Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -155,7 +155,19 @@ class LearningStyleQuiz:
|
|
| 155 |
"When learning a new skill, I prefer to:",
|
| 156 |
"When studying, I like to:",
|
| 157 |
"I prefer teachers who:",
|
| 158 |
-
"When solving problems, I:"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 159 |
]
|
| 160 |
|
| 161 |
self.options = [
|
|
@@ -166,7 +178,19 @@ class LearningStyleQuiz:
|
|
| 166 |
["Watch demonstrations", "Listen to instructions", "Read instructions", "Jump in and try it"],
|
| 167 |
["Use highlighters and diagrams", "Discuss with others", "Read and take notes", "Move around or use objects"],
|
| 168 |
["Use visual aids", "Give interesting lectures", "Provide reading materials", "Include hands-on activities"],
|
| 169 |
-
["Draw pictures or diagrams", "Talk through options", "Make lists", "Try different solutions physically"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 170 |
]
|
| 171 |
|
| 172 |
self.learning_styles = {
|
|
@@ -250,14 +274,14 @@ class LearningStyleQuiz:
|
|
| 250 |
# Initialize learning style quiz
|
| 251 |
learning_style_quiz = LearningStyleQuiz()
|
| 252 |
|
| 253 |
-
class
|
| 254 |
def __init__(self):
|
| 255 |
self.patterns = {
|
| 256 |
'student_info': re.compile(
|
| 257 |
-
r"LEGAL NAME:\s*([
|
| 258 |
r"GRADE LEVEL:\s*(\d+).*?"
|
| 259 |
r"FL STUDENT ID:\s*(\w+).*?"
|
| 260 |
-
r"CURRENT SCHOOL:\s*(\d+\s+[
|
| 261 |
re.DOTALL
|
| 262 |
),
|
| 263 |
'gpa': re.compile(
|
|
@@ -269,7 +293,7 @@ class EnhancedMiamiDadeTranscriptParser:
|
|
| 269 |
re.DOTALL
|
| 270 |
),
|
| 271 |
'course': re.compile(
|
| 272 |
-
r"(\d)\s+(\w+)\s+([
|
| 273 |
re.DOTALL
|
| 274 |
),
|
| 275 |
'assessment': re.compile(
|
|
@@ -282,92 +306,58 @@ class EnhancedMiamiDadeTranscriptParser:
|
|
| 282 |
'class_rank': re.compile(
|
| 283 |
r"\*\s+PERCENTILE:\s*(\d+)\s*\*\s*TOTAL NUMBER IN CLASS:\s*(\d+)",
|
| 284 |
re.DOTALL
|
|
|
|
|
|
|
|
|
|
|
|
|
| 285 |
)
|
| 286 |
}
|
| 287 |
|
| 288 |
def parse_transcript(self, file_path: str) -> Dict:
|
| 289 |
-
"""Parse Miami-Dade transcript PDF with
|
| 290 |
try:
|
| 291 |
# First try pdfplumber
|
|
|
|
| 292 |
with pdfplumber.open(file_path) as pdf:
|
| 293 |
-
|
| 294 |
-
|
|
|
|
| 295 |
# Fallback to PyMuPDF if text extraction is poor
|
| 296 |
-
if len(text) < 500:
|
| 297 |
doc = fitz.open(file_path)
|
| 298 |
text = ""
|
| 299 |
for page in doc:
|
| 300 |
text += page.get_text()
|
| 301 |
|
| 302 |
-
|
| 303 |
-
with open("debug_transcript.txt", "w") as f:
|
| 304 |
-
f.write(text)
|
| 305 |
-
|
| 306 |
-
return self._parse_format(text)
|
| 307 |
except Exception as e:
|
| 308 |
logger.error(f"Error parsing transcript: {str(e)}")
|
| 309 |
raise ValueError(f"Error processing transcript: {str(e)}")
|
| 310 |
|
| 311 |
-
def
|
| 312 |
-
"""Parse the transcript format
|
| 313 |
-
|
| 314 |
-
|
| 315 |
-
|
| 316 |
-
|
| 317 |
-
|
| 318 |
-
|
| 319 |
-
|
| 320 |
-
|
|
|
|
|
|
|
|
|
|
| 321 |
|
| 322 |
-
|
| 323 |
-
if not parsed_data['student_info'] or not parsed_data['course_history']:
|
| 324 |
-
raise ValueError("Incomplete data extracted from transcript")
|
| 325 |
-
|
| 326 |
-
return parsed_data
|
| 327 |
-
except Exception as e:
|
| 328 |
-
logger.error(f"Format parsing error: {str(e)}")
|
| 329 |
-
return self._parse_alternative_format(text)
|
| 330 |
-
|
| 331 |
-
def _parse_alternative_format(self, text: str) -> Dict:
|
| 332 |
-
"""Fallback parser for alternative formats"""
|
| 333 |
-
try:
|
| 334 |
-
parsed_data = {
|
| 335 |
-
'student_info': {},
|
| 336 |
-
'academic_summary': {},
|
| 337 |
-
'course_history': [],
|
| 338 |
-
'assessments': {},
|
| 339 |
-
'format': 'alternative'
|
| 340 |
-
}
|
| 341 |
-
|
| 342 |
-
# Try to extract basic student info
|
| 343 |
-
name_match = re.search(r"NAME:\s*([A-Z]+,\s*[A-Z]+)", text)
|
| 344 |
-
if name_match:
|
| 345 |
-
parsed_data['student_info']['name'] = name_match.group(1).replace(',', ' ').strip()
|
| 346 |
-
|
| 347 |
-
# Try to extract GPA
|
| 348 |
-
gpa_match = re.search(r"GPA:\s*([\d.]+)", text)
|
| 349 |
-
if gpa_match:
|
| 350 |
-
parsed_data['academic_summary']['gpa'] = {
|
| 351 |
-
'district': float(gpa_match.group(1)),
|
| 352 |
-
'state': float(gpa_match.group(1)) # Assume same if not specified
|
| 353 |
-
}
|
| 354 |
-
|
| 355 |
-
return parsed_data
|
| 356 |
-
except Exception as e:
|
| 357 |
-
logger.error(f"Alternative parser failed: {str(e)}")
|
| 358 |
-
raise ValueError("Could not parse transcript in any supported format")
|
| 359 |
|
| 360 |
def _parse_student_info(self, text: str) -> Dict:
|
| 361 |
"""Extract student information with improved pattern matching"""
|
| 362 |
match = self.patterns['student_info'].search(text)
|
| 363 |
if not match:
|
| 364 |
-
|
| 365 |
-
|
| 366 |
-
if not match:
|
| 367 |
-
return {}
|
| 368 |
-
|
| 369 |
return {
|
| 370 |
-
'name': match.group(1).
|
| 371 |
'grade': match.group(2) if match and len(match.groups()) > 1 else "Unknown",
|
| 372 |
'student_id': match.group(3) if match and len(match.groups()) > 2 else "Unknown",
|
| 373 |
'school': match.group(4).strip() if match and len(match.groups()) > 3 else "Unknown",
|
|
@@ -376,55 +366,35 @@ class EnhancedMiamiDadeTranscriptParser:
|
|
| 376 |
}
|
| 377 |
|
| 378 |
def _extract_birth_date(self, text: str) -> Optional[str]:
|
| 379 |
-
"""Extract birth date from transcript
|
| 380 |
-
|
| 381 |
-
|
| 382 |
-
|
| 383 |
-
r"DATE OF BIRTH:\s*([^\n]+)"
|
| 384 |
-
]
|
| 385 |
-
|
| 386 |
-
for pattern in patterns:
|
| 387 |
-
birth_match = re.search(pattern, text)
|
| 388 |
-
if birth_match:
|
| 389 |
-
return birth_match.group(1)
|
| 390 |
return None
|
| 391 |
|
| 392 |
def _extract_ethnicity(self, text: str) -> Optional[str]:
|
| 393 |
-
"""Extract ethnicity information
|
| 394 |
-
|
| 395 |
-
|
| 396 |
-
|
| 397 |
-
r"DEMOGRAPHICS.*?ETHNICITY:\s*([^\n]+)"
|
| 398 |
-
]
|
| 399 |
-
|
| 400 |
-
for pattern in patterns:
|
| 401 |
-
eth_match = re.search(pattern, text, re.DOTALL)
|
| 402 |
-
if eth_match:
|
| 403 |
-
return eth_match.group(1).strip()
|
| 404 |
return None
|
| 405 |
|
| 406 |
def _parse_academic_summary(self, text: str) -> Dict:
|
| 407 |
-
"""Parse academic summary section
|
| 408 |
summary = {
|
| 409 |
'gpa': {'district': None, 'state': None},
|
| 410 |
'credits': {},
|
| 411 |
'class_rank': {'percentile': None, 'class_size': None}
|
| 412 |
}
|
| 413 |
|
| 414 |
-
#
|
| 415 |
gpa_match = self.patterns['gpa'].search(text)
|
| 416 |
-
if not gpa_match:
|
| 417 |
-
gpa_match = re.search(r"GPA.*?([\d.]+).*?([\d.]+)", text)
|
| 418 |
-
|
| 419 |
if gpa_match:
|
| 420 |
summary['gpa']['district'] = float(gpa_match.group(1))
|
| 421 |
summary['gpa']['state'] = float(gpa_match.group(2)) if gpa_match.group(2) else summary['gpa']['district']
|
| 422 |
|
| 423 |
-
#
|
| 424 |
credits_matches = self.patterns['credits'].finditer(text)
|
| 425 |
-
if not credits_matches:
|
| 426 |
-
credits_matches = re.finditer(r"([A-Z ]+)\s+([\d.]+)\s+([\d.]+)\s+([\d.]+)", text)
|
| 427 |
-
|
| 428 |
for match in credits_matches:
|
| 429 |
subject = match.group(1).strip()
|
| 430 |
summary['credits'][subject] = {
|
|
@@ -433,11 +403,8 @@ class EnhancedMiamiDadeTranscriptParser:
|
|
| 433 |
'remaining': float(match.group(4)) if match.group(4) else None
|
| 434 |
}
|
| 435 |
|
| 436 |
-
#
|
| 437 |
rank_match = self.patterns['class_rank'].search(text)
|
| 438 |
-
if not rank_match:
|
| 439 |
-
rank_match = re.search(r"RANK.*?(\d+).*?(\d+)", text)
|
| 440 |
-
|
| 441 |
if rank_match:
|
| 442 |
summary['class_rank']['percentile'] = int(rank_match.group(1))
|
| 443 |
summary['class_rank']['class_size'] = int(rank_match.group(2))
|
|
@@ -445,48 +412,40 @@ class EnhancedMiamiDadeTranscriptParser:
|
|
| 445 |
return summary
|
| 446 |
|
| 447 |
def _parse_courses(self, text: str) -> List[Dict]:
|
| 448 |
-
"""Parse course history section
|
| 449 |
courses = []
|
| 450 |
|
| 451 |
# Try primary pattern first
|
| 452 |
for match in self.patterns['course'].finditer(text):
|
| 453 |
-
courses.append(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 454 |
|
| 455 |
-
# If no courses found, try alternative
|
| 456 |
if not courses:
|
| 457 |
-
|
| 458 |
-
r"(\d{4}-\d{4})\s+(\w+)\s+([^\n]+?)\s+([A-Z]{2})\s+([A-Z])\s+([\d.]+)",
|
| 459 |
-
re.DOTALL
|
| 460 |
-
)
|
| 461 |
-
for match in alt_pattern.finditer(text):
|
| 462 |
courses.append({
|
| 463 |
'term': match.group(1),
|
| 464 |
'course_code': match.group(2),
|
| 465 |
'course_title': match.group(3).strip(),
|
| 466 |
'subject_area': match.group(4),
|
| 467 |
'grade': match.group(5),
|
| 468 |
-
'
|
| 469 |
-
'
|
| 470 |
})
|
| 471 |
|
| 472 |
return courses
|
| 473 |
|
| 474 |
-
def _create_course_dict(self, match) -> Dict:
|
| 475 |
-
"""Create standardized course dictionary from regex match"""
|
| 476 |
-
return {
|
| 477 |
-
'term': match.group(1),
|
| 478 |
-
'course_code': match.group(2),
|
| 479 |
-
'course_title': match.group(3).strip(),
|
| 480 |
-
'subject_area': match.group(4),
|
| 481 |
-
'grade': match.group(5),
|
| 482 |
-
'flag': match.group(6),
|
| 483 |
-
'credit_status': match.group(7),
|
| 484 |
-
'credit_attempted': float(match.group(8)),
|
| 485 |
-
'credit_earned': float(match.group(9))
|
| 486 |
-
}
|
| 487 |
-
|
| 488 |
def _parse_assessments(self, text: str) -> Dict:
|
| 489 |
-
"""Parse assessment and requirement information
|
| 490 |
assessments = {
|
| 491 |
'ela_passed_date': None,
|
| 492 |
'algebra_passed': False,
|
|
@@ -497,11 +456,7 @@ class EnhancedMiamiDadeTranscriptParser:
|
|
| 497 |
}
|
| 498 |
}
|
| 499 |
|
| 500 |
-
# Try multiple assessment patterns
|
| 501 |
matches = self.patterns['assessment'].finditer(text)
|
| 502 |
-
if not matches:
|
| 503 |
-
matches = re.finditer(r"(ENGLISH|ALGEBRA|BIOLOGY|SERVICE).*?(PASSED|MET|YES|NO|\d{2}/\d{4})", text)
|
| 504 |
-
|
| 505 |
for match in matches:
|
| 506 |
if match.group(1): # ELA date
|
| 507 |
assessments['ela_passed_date'] = match.group(1)
|
|
@@ -517,8 +472,8 @@ class EnhancedMiamiDadeTranscriptParser:
|
|
| 517 |
|
| 518 |
return assessments
|
| 519 |
|
| 520 |
-
# Initialize the
|
| 521 |
-
transcript_parser =
|
| 522 |
|
| 523 |
class AcademicAnalyzer:
|
| 524 |
def __init__(self):
|
|
@@ -547,7 +502,7 @@ class AcademicAnalyzer:
|
|
| 547 |
if parsed_data.get('format') == 'progress_summary':
|
| 548 |
weighted_gpa = float(parsed_data.get('student_info', {}).get('weighted_gpa', 0))
|
| 549 |
unweighted_gpa = float(parsed_data.get('student_info', {}).get('unweighted_gpa', 0))
|
| 550 |
-
elif parsed_data.get('format') == '
|
| 551 |
weighted_gpa = float(parsed_data.get('academic_summary', {}).get('gpa', {}).get('district', 0))
|
| 552 |
unweighted_gpa = float(parsed_data.get('academic_summary', {}).get('gpa', {}).get('state', 0))
|
| 553 |
else: # Alternative format
|
|
@@ -681,7 +636,7 @@ class AcademicAnalyzer:
|
|
| 681 |
'remaining': max(0, info.get('required', 0) - info.get('earned', 0))
|
| 682 |
}
|
| 683 |
for subject, info in credits.items()
|
| 684 |
-
if info and info.get('required', 0) > info.get('earned', 0)
|
| 685 |
]
|
| 686 |
|
| 687 |
current_grade = parsed_data.get('student_info', {}).get('grade', '')
|
|
@@ -2584,4 +2539,5 @@ def create_enhanced_interface():
|
|
| 2584 |
app = create_enhanced_interface()
|
| 2585 |
|
| 2586 |
if __name__ == "__main__":
|
| 2587 |
-
app.launch(server_name="0.0.0.0", server_port=7860)
|
|
|
|
|
|
| 155 |
"When learning a new skill, I prefer to:",
|
| 156 |
"When studying, I like to:",
|
| 157 |
"I prefer teachers who:",
|
| 158 |
+
"When solving problems, I:",
|
| 159 |
+
"When working on a group project, I:",
|
| 160 |
+
"My ideal study environment is:",
|
| 161 |
+
"When preparing for a test, I:",
|
| 162 |
+
"When reading instructions, I:",
|
| 163 |
+
"When explaining something to someone, I:",
|
| 164 |
+
"When taking notes in class, I:",
|
| 165 |
+
"When using a new device or app, I:",
|
| 166 |
+
"When remembering names, I:",
|
| 167 |
+
"When choosing a book to read, I:",
|
| 168 |
+
"When giving a presentation, I:",
|
| 169 |
+
"When organizing my work, I:",
|
| 170 |
+
"When relaxing, I enjoy:"
|
| 171 |
]
|
| 172 |
|
| 173 |
self.options = [
|
|
|
|
| 178 |
["Watch demonstrations", "Listen to instructions", "Read instructions", "Jump in and try it"],
|
| 179 |
["Use highlighters and diagrams", "Discuss with others", "Read and take notes", "Move around or use objects"],
|
| 180 |
["Use visual aids", "Give interesting lectures", "Provide reading materials", "Include hands-on activities"],
|
| 181 |
+
["Draw pictures or diagrams", "Talk through options", "Make lists", "Try different solutions physically"],
|
| 182 |
+
["Create visual plans", "Discuss ideas verbally", "Write detailed plans", "Take on hands-on tasks"],
|
| 183 |
+
["Somewhere quiet with good lighting", "Somewhere I can discuss ideas", "A library with lots of resources", "Somewhere I can move around"],
|
| 184 |
+
["Create visual study aids", "Recite information aloud", "Write summaries", "Create physical models"],
|
| 185 |
+
["Look at diagrams first", "Have someone explain them", "Read them carefully", "Try to follow them as I go"],
|
| 186 |
+
["Draw diagrams or pictures", "Explain verbally", "Write detailed explanations", "Show by doing"],
|
| 187 |
+
["Draw diagrams and symbols", "Record lectures to listen later", "Write detailed notes", "Underline and highlight"],
|
| 188 |
+
["Look at the screen layout", "Listen to audio instructions", "Read the manual", "Start clicking buttons"],
|
| 189 |
+
["Remember faces better than names", "Remember names when I hear them", "Remember names when I see them written", "Remember people by activities we did"],
|
| 190 |
+
["Choose books with pictures/diagrams", "Choose audiobooks", "Choose text-heavy books", "Choose interactive books"],
|
| 191 |
+
["Use lots of visual aids", "Focus on my verbal delivery", "Provide handouts", "Use props or demonstrations"],
|
| 192 |
+
["Use color-coding systems", "Talk through my plan", "Make detailed lists", "Physically arrange materials"],
|
| 193 |
+
["Watching videos or art", "Listening to music/podcasts", "Reading", "Doing physical activities"]
|
| 194 |
]
|
| 195 |
|
| 196 |
self.learning_styles = {
|
|
|
|
| 274 |
# Initialize learning style quiz
|
| 275 |
learning_style_quiz = LearningStyleQuiz()
|
| 276 |
|
| 277 |
+
class MiamiDadeTranscriptParser:
|
| 278 |
def __init__(self):
|
| 279 |
self.patterns = {
|
| 280 |
'student_info': re.compile(
|
| 281 |
+
r"LEGAL NAME:\s*([^\n]+?)\s*MAILING\s+ADDRESS:.*?"
|
| 282 |
r"GRADE LEVEL:\s*(\d+).*?"
|
| 283 |
r"FL STUDENT ID:\s*(\w+).*?"
|
| 284 |
+
r"CURRENT SCHOOL:\s*(\d+\s+[^\n]+?)\s*\(",
|
| 285 |
re.DOTALL
|
| 286 |
),
|
| 287 |
'gpa': re.compile(
|
|
|
|
| 293 |
re.DOTALL
|
| 294 |
),
|
| 295 |
'course': re.compile(
|
| 296 |
+
r"(\d)\s+(\w+)\s+([^\n]+?)\s+([A-Z]{2})\s+([A-Z])\s+([A-Z])\s+([A-Z])\s+([\d.]+)\s+([\d.]+)",
|
| 297 |
re.DOTALL
|
| 298 |
),
|
| 299 |
'assessment': re.compile(
|
|
|
|
| 306 |
'class_rank': re.compile(
|
| 307 |
r"\*\s+PERCENTILE:\s*(\d+)\s*\*\s*TOTAL NUMBER IN CLASS:\s*(\d+)",
|
| 308 |
re.DOTALL
|
| 309 |
+
),
|
| 310 |
+
'course_alt': re.compile(
|
| 311 |
+
r"(\d)\s+(\w+)\s+([^\n]+?)\s+([A-Z]{2})\s+([A-Z])\s+([A-Z])\s+([\d.]+)\s+([\d.]+)",
|
| 312 |
+
re.DOTALL
|
| 313 |
)
|
| 314 |
}
|
| 315 |
|
| 316 |
def parse_transcript(self, file_path: str) -> Dict:
|
| 317 |
+
"""Parse Miami-Dade transcript PDF with multiple extraction methods"""
|
| 318 |
try:
|
| 319 |
# First try pdfplumber
|
| 320 |
+
text = ""
|
| 321 |
with pdfplumber.open(file_path) as pdf:
|
| 322 |
+
for page in pdf.pages:
|
| 323 |
+
text += page.extract_text() + "\n"
|
| 324 |
+
|
| 325 |
# Fallback to PyMuPDF if text extraction is poor
|
| 326 |
+
if len(text) < 500:
|
| 327 |
doc = fitz.open(file_path)
|
| 328 |
text = ""
|
| 329 |
for page in doc:
|
| 330 |
text += page.get_text()
|
| 331 |
|
| 332 |
+
return self._parse_miami_dade_format(text)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 333 |
except Exception as e:
|
| 334 |
logger.error(f"Error parsing transcript: {str(e)}")
|
| 335 |
raise ValueError(f"Error processing transcript: {str(e)}")
|
| 336 |
|
| 337 |
+
def _parse_miami_dade_format(self, text: str) -> Dict:
|
| 338 |
+
"""Parse the specific Miami-Dade transcript format"""
|
| 339 |
+
parsed_data = {
|
| 340 |
+
'student_info': self._parse_student_info(text),
|
| 341 |
+
'academic_summary': self._parse_academic_summary(text),
|
| 342 |
+
'course_history': self._parse_courses(text),
|
| 343 |
+
'assessments': self._parse_assessments(text),
|
| 344 |
+
'format': 'miami_dade_v3'
|
| 345 |
+
}
|
| 346 |
+
|
| 347 |
+
# Validate we got at least some data
|
| 348 |
+
if not parsed_data['student_info'] or not parsed_data['course_history']:
|
| 349 |
+
raise ValueError("Incomplete data extracted from transcript")
|
| 350 |
|
| 351 |
+
return parsed_data
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 352 |
|
| 353 |
def _parse_student_info(self, text: str) -> Dict:
|
| 354 |
"""Extract student information with improved pattern matching"""
|
| 355 |
match = self.patterns['student_info'].search(text)
|
| 356 |
if not match:
|
| 357 |
+
return {}
|
| 358 |
+
|
|
|
|
|
|
|
|
|
|
| 359 |
return {
|
| 360 |
+
'name': match.group(1).strip(),
|
| 361 |
'grade': match.group(2) if match and len(match.groups()) > 1 else "Unknown",
|
| 362 |
'student_id': match.group(3) if match and len(match.groups()) > 2 else "Unknown",
|
| 363 |
'school': match.group(4).strip() if match and len(match.groups()) > 3 else "Unknown",
|
|
|
|
| 366 |
}
|
| 367 |
|
| 368 |
def _extract_birth_date(self, text: str) -> Optional[str]:
|
| 369 |
+
"""Extract birth date from transcript"""
|
| 370 |
+
birth_match = re.search(r"BIRTH DATE:\s*(\d{2}/\d{2}/\d{4})", text)
|
| 371 |
+
if birth_match:
|
| 372 |
+
return birth_match.group(1)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 373 |
return None
|
| 374 |
|
| 375 |
def _extract_ethnicity(self, text: str) -> Optional[str]:
|
| 376 |
+
"""Extract ethnicity information"""
|
| 377 |
+
eth_match = re.search(r"ETHNICITY:\s*([^\n]+)", text)
|
| 378 |
+
if eth_match:
|
| 379 |
+
return eth_match.group(1).strip()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 380 |
return None
|
| 381 |
|
| 382 |
def _parse_academic_summary(self, text: str) -> Dict:
|
| 383 |
+
"""Parse academic summary section"""
|
| 384 |
summary = {
|
| 385 |
'gpa': {'district': None, 'state': None},
|
| 386 |
'credits': {},
|
| 387 |
'class_rank': {'percentile': None, 'class_size': None}
|
| 388 |
}
|
| 389 |
|
| 390 |
+
# GPA
|
| 391 |
gpa_match = self.patterns['gpa'].search(text)
|
|
|
|
|
|
|
|
|
|
| 392 |
if gpa_match:
|
| 393 |
summary['gpa']['district'] = float(gpa_match.group(1))
|
| 394 |
summary['gpa']['state'] = float(gpa_match.group(2)) if gpa_match.group(2) else summary['gpa']['district']
|
| 395 |
|
| 396 |
+
# Credits
|
| 397 |
credits_matches = self.patterns['credits'].finditer(text)
|
|
|
|
|
|
|
|
|
|
| 398 |
for match in credits_matches:
|
| 399 |
subject = match.group(1).strip()
|
| 400 |
summary['credits'][subject] = {
|
|
|
|
| 403 |
'remaining': float(match.group(4)) if match.group(4) else None
|
| 404 |
}
|
| 405 |
|
| 406 |
+
# Class Rank
|
| 407 |
rank_match = self.patterns['class_rank'].search(text)
|
|
|
|
|
|
|
|
|
|
| 408 |
if rank_match:
|
| 409 |
summary['class_rank']['percentile'] = int(rank_match.group(1))
|
| 410 |
summary['class_rank']['class_size'] = int(rank_match.group(2))
|
|
|
|
| 412 |
return summary
|
| 413 |
|
| 414 |
def _parse_courses(self, text: str) -> List[Dict]:
|
| 415 |
+
"""Parse course history section"""
|
| 416 |
courses = []
|
| 417 |
|
| 418 |
# Try primary pattern first
|
| 419 |
for match in self.patterns['course'].finditer(text):
|
| 420 |
+
courses.append({
|
| 421 |
+
'term': match.group(1),
|
| 422 |
+
'course_code': match.group(2),
|
| 423 |
+
'course_title': match.group(3).strip(),
|
| 424 |
+
'subject_area': match.group(4),
|
| 425 |
+
'grade': match.group(5),
|
| 426 |
+
'flag': match.group(6),
|
| 427 |
+
'credit_status': match.group(7),
|
| 428 |
+
'credit_attempted': float(match.group(8)),
|
| 429 |
+
'credit_earned': float(match.group(9))
|
| 430 |
+
})
|
| 431 |
|
| 432 |
+
# If no courses found, try alternative pattern
|
| 433 |
if not courses:
|
| 434 |
+
for match in self.patterns['course_alt'].finditer(text):
|
|
|
|
|
|
|
|
|
|
|
|
|
| 435 |
courses.append({
|
| 436 |
'term': match.group(1),
|
| 437 |
'course_code': match.group(2),
|
| 438 |
'course_title': match.group(3).strip(),
|
| 439 |
'subject_area': match.group(4),
|
| 440 |
'grade': match.group(5),
|
| 441 |
+
'credit_attempted': float(match.group(6)),
|
| 442 |
+
'credit_earned': float(match.group(7))
|
| 443 |
})
|
| 444 |
|
| 445 |
return courses
|
| 446 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 447 |
def _parse_assessments(self, text: str) -> Dict:
|
| 448 |
+
"""Parse assessment and requirement information"""
|
| 449 |
assessments = {
|
| 450 |
'ela_passed_date': None,
|
| 451 |
'algebra_passed': False,
|
|
|
|
| 456 |
}
|
| 457 |
}
|
| 458 |
|
|
|
|
| 459 |
matches = self.patterns['assessment'].finditer(text)
|
|
|
|
|
|
|
|
|
|
| 460 |
for match in matches:
|
| 461 |
if match.group(1): # ELA date
|
| 462 |
assessments['ela_passed_date'] = match.group(1)
|
|
|
|
| 472 |
|
| 473 |
return assessments
|
| 474 |
|
| 475 |
+
# Initialize the parser
|
| 476 |
+
transcript_parser = MiamiDadeTranscriptParser()
|
| 477 |
|
| 478 |
class AcademicAnalyzer:
|
| 479 |
def __init__(self):
|
|
|
|
| 502 |
if parsed_data.get('format') == 'progress_summary':
|
| 503 |
weighted_gpa = float(parsed_data.get('student_info', {}).get('weighted_gpa', 0))
|
| 504 |
unweighted_gpa = float(parsed_data.get('student_info', {}).get('unweighted_gpa', 0))
|
| 505 |
+
elif parsed_data.get('format') == 'miami_dade_v3':
|
| 506 |
weighted_gpa = float(parsed_data.get('academic_summary', {}).get('gpa', {}).get('district', 0))
|
| 507 |
unweighted_gpa = float(parsed_data.get('academic_summary', {}).get('gpa', {}).get('state', 0))
|
| 508 |
else: # Alternative format
|
|
|
|
| 636 |
'remaining': max(0, info.get('required', 0) - info.get('earned', 0))
|
| 637 |
}
|
| 638 |
for subject, info in credits.items()
|
| 639 |
+
if info and info.get('required', 0) > info.get('earned', 0))
|
| 640 |
]
|
| 641 |
|
| 642 |
current_grade = parsed_data.get('student_info', {}).get('grade', '')
|
|
|
|
| 2539 |
app = create_enhanced_interface()
|
| 2540 |
|
| 2541 |
if __name__ == "__main__":
|
| 2542 |
+
app.launch(server_name="0.0.0.0", server_port=7860)
|
| 2543 |
+
|