Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -272,115 +272,184 @@ class TranscriptParser:
|
|
| 272 |
self.requirements = {}
|
| 273 |
self.current_courses = []
|
| 274 |
self.course_history = []
|
|
|
|
| 275 |
|
| 276 |
def parse_transcript(self, text: str) -> Dict:
|
| 277 |
-
"""
|
| 278 |
-
|
| 279 |
-
|
| 280 |
-
|
| 281 |
-
|
| 282 |
-
|
| 283 |
-
|
| 284 |
-
|
| 285 |
-
|
| 286 |
-
|
| 287 |
-
|
| 288 |
-
|
| 289 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 290 |
|
| 291 |
def _extract_student_info(self, text: str):
|
| 292 |
-
"""Enhanced student info extraction
|
| 293 |
-
#
|
| 294 |
-
|
| 295 |
-
|
| 296 |
-
r"(\d{7})\s*[-]?\s*([\w\s,]+?)\s*"
|
| 297 |
-
r"(?:\||Cohort\s*\w+\s*\||Un-weighted\s*GPA\s*([\d.]+)\s*\||Comm\s*Serv\s*Hours\s*(\d+))?"
|
| 298 |
-
)
|
| 299 |
|
| 300 |
-
|
| 301 |
-
if header_match:
|
| 302 |
self.student_data = {
|
| 303 |
-
"id":
|
| 304 |
-
"name":
|
| 305 |
-
"
|
| 306 |
-
"
|
| 307 |
}
|
| 308 |
|
| 309 |
-
#
|
| 310 |
-
|
| 311 |
-
|
| 312 |
-
r"(?:\||YOG\s*[:]?\s*(\d{4})\s*\||Weighted\s*GPA\s*([\d.]+)\s*\||Total\s*Credits\s*Earned\s*([\d.]+))?"
|
| 313 |
-
)
|
| 314 |
|
| 315 |
-
|
| 316 |
-
if grade_match:
|
| 317 |
self.student_data.update({
|
| 318 |
-
"
|
| 319 |
-
"
|
| 320 |
-
|
| 321 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 322 |
})
|
| 323 |
|
| 324 |
def _extract_requirements(self, text: str):
|
| 325 |
-
"""Parse the graduation requirements section"""
|
| 326 |
-
|
| 327 |
-
|
| 328 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 329 |
)
|
| 330 |
|
| 331 |
-
|
| 332 |
-
|
| 333 |
-
|
| 334 |
-
|
| 335 |
-
|
| 336 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 337 |
}
|
| 338 |
|
| 339 |
def _extract_course_history(self, text: str):
|
| 340 |
-
"""Parse the detailed course history"""
|
| 341 |
-
|
| 342 |
-
|
| 343 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 344 |
)
|
| 345 |
|
| 346 |
-
|
|
|
|
|
|
|
| 347 |
self.course_history.append({
|
| 348 |
-
"requirement_category":
|
| 349 |
-
"school_year":
|
| 350 |
-
"grade_level":
|
| 351 |
-
"course_code":
|
| 352 |
-
"description":
|
| 353 |
-
"term":
|
| 354 |
-
"district_number":
|
| 355 |
-
"grade":
|
| 356 |
-
"inclusion_status":
|
| 357 |
-
"credits":
|
| 358 |
})
|
| 359 |
|
| 360 |
def _extract_current_courses(self, text: str):
|
| 361 |
"""Identify courses currently in progress"""
|
| 362 |
-
in_progress = [c for c in self.course_history if "inProgress" in c["credits"]]
|
| 363 |
self.current_courses = [
|
| 364 |
{
|
| 365 |
"course": c["description"],
|
|
|
|
| 366 |
"category": c["requirement_category"],
|
| 367 |
"term": c["term"],
|
| 368 |
-
"credits": c["credits"]
|
|
|
|
| 369 |
}
|
| 370 |
-
for c in
|
|
|
|
| 371 |
]
|
| 372 |
|
| 373 |
-
def _calculate_completion(self)
|
| 374 |
-
"""Calculate overall completion status"""
|
| 375 |
total_required = sum(req["required"] for req in self.requirements.values())
|
| 376 |
total_completed = sum(req["completed"] for req in self.requirements.values())
|
| 377 |
|
| 378 |
-
|
| 379 |
-
"
|
| 380 |
-
"
|
| 381 |
"percent_complete": round((total_completed / total_required) * 100, 1),
|
| 382 |
-
"remaining_credits": total_required - total_completed
|
| 383 |
-
|
|
|
|
| 384 |
|
| 385 |
def to_json(self) -> str:
|
| 386 |
"""Export parsed data as JSON"""
|
|
@@ -389,51 +458,92 @@ class TranscriptParser:
|
|
| 389 |
"requirements": self.requirements,
|
| 390 |
"current_courses": self.current_courses,
|
| 391 |
"course_history": self.course_history,
|
| 392 |
-
"
|
| 393 |
}, indent=2)
|
| 394 |
|
| 395 |
-
def
|
| 396 |
-
"""
|
| 397 |
-
|
| 398 |
-
|
| 399 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 400 |
|
| 401 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 402 |
try:
|
|
|
|
| 403 |
if progress:
|
| 404 |
-
progress(0.1, desc="
|
|
|
|
| 405 |
parser = TranscriptParser()
|
| 406 |
parsed_data = parser.parse_transcript(text)
|
|
|
|
| 407 |
if progress:
|
| 408 |
-
progress(0.
|
| 409 |
-
|
| 410 |
-
# Convert to expected format
|
| 411 |
-
formatted_data = {
|
| 412 |
-
"grade_level": parsed_data["student_info"].get("current_grade", "Unknown"),
|
| 413 |
-
"gpa": {
|
| 414 |
-
"weighted": parsed_data["student_info"].get("weighted_gpa", "N/A"),
|
| 415 |
-
"unweighted": parsed_data["student_info"].get("unweighted_gpa", "N/A")
|
| 416 |
-
},
|
| 417 |
-
"courses": []
|
| 418 |
-
}
|
| 419 |
|
| 420 |
-
|
| 421 |
-
for course in parsed_data["course_history"]:
|
| 422 |
-
formatted_data["courses"].append({
|
| 423 |
-
"code": course["course_code"],
|
| 424 |
-
"name": course["description"],
|
| 425 |
-
"grade": course["grade"],
|
| 426 |
-
"credits": course["credits"],
|
| 427 |
-
"year": course["school_year"],
|
| 428 |
-
"grade_level": course["grade_level"]
|
| 429 |
-
})
|
| 430 |
|
| 431 |
-
if progress:
|
| 432 |
-
progress(1.0)
|
| 433 |
-
return formatted_data
|
| 434 |
-
|
| 435 |
except Exception as e:
|
| 436 |
logging.warning(f"Structured parsing failed, falling back to AI: {str(e)}")
|
|
|
|
| 437 |
# Fall back to AI parsing if structured parsing fails
|
| 438 |
return parse_transcript_with_ai_fallback(text, progress)
|
| 439 |
|
|
@@ -447,6 +557,8 @@ def parse_transcript_with_ai_fallback(text: str, progress=gr.Progress()) -> Dict
|
|
| 447 |
- Current grade level
|
| 448 |
- Weighted GPA (if available)
|
| 449 |
- Unweighted GPA (if available)
|
|
|
|
|
|
|
| 450 |
- List of all courses with:
|
| 451 |
* Course code
|
| 452 |
* Course name
|
|
@@ -454,6 +566,7 @@ def parse_transcript_with_ai_fallback(text: str, progress=gr.Progress()) -> Dict
|
|
| 454 |
* Credits earned
|
| 455 |
* Year/semester taken
|
| 456 |
* Grade level when taken
|
|
|
|
| 457 |
Return the data in JSON format.
|
| 458 |
|
| 459 |
Transcript Text:
|
|
@@ -464,6 +577,10 @@ def parse_transcript_with_ai_fallback(text: str, progress=gr.Progress()) -> Dict
|
|
| 464 |
if progress:
|
| 465 |
progress(0.1, desc="Processing transcript with AI...")
|
| 466 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 467 |
# Tokenize and generate response
|
| 468 |
inputs = tokenizer(prompt, return_tensors="pt").to(model_loader.device)
|
| 469 |
if progress:
|
|
@@ -471,7 +588,7 @@ def parse_transcript_with_ai_fallback(text: str, progress=gr.Progress()) -> Dict
|
|
| 471 |
|
| 472 |
outputs = model.generate(
|
| 473 |
**inputs,
|
| 474 |
-
max_new_tokens=
|
| 475 |
temperature=0.1,
|
| 476 |
do_sample=True
|
| 477 |
)
|
|
@@ -500,43 +617,8 @@ def parse_transcript_with_ai_fallback(text: str, progress=gr.Progress()) -> Dict
|
|
| 500 |
logging.error(f"AI parsing error: {str(e)}")
|
| 501 |
raise gr.Error(f"Error processing transcript: {str(e)}")
|
| 502 |
|
| 503 |
-
def format_transcript_output(data: Dict) -> str:
|
| 504 |
-
"""Format the parsed data into human-readable text."""
|
| 505 |
-
output = []
|
| 506 |
-
output.append(f"Student Transcript Summary\n{'='*40}")
|
| 507 |
-
output.append(f"Current Grade Level: {data.get('grade_level', 'Unknown')}")
|
| 508 |
-
|
| 509 |
-
if 'gpa' in data:
|
| 510 |
-
output.append(f"\nGPA:")
|
| 511 |
-
output.append(f"- Weighted: {data['gpa'].get('weighted', 'N/A')}")
|
| 512 |
-
output.append(f"- Unweighted: {data['gpa'].get('unweighted', 'N/A')}")
|
| 513 |
-
|
| 514 |
-
if 'courses' in data:
|
| 515 |
-
output.append("\nCourse History:\n" + '='*40)
|
| 516 |
-
|
| 517 |
-
# Group courses by grade level
|
| 518 |
-
courses_by_grade = defaultdict(list)
|
| 519 |
-
for course in data['courses']:
|
| 520 |
-
grade_level = course.get('grade_level', 'Unknown')
|
| 521 |
-
courses_by_grade[grade_level].append(course)
|
| 522 |
-
|
| 523 |
-
# Sort grades numerically
|
| 524 |
-
for grade in sorted(courses_by_grade.keys(), key=lambda x: int(x) if x.isdigit() else x):
|
| 525 |
-
output.append(f"\nGrade {grade}:\n{'-'*30}")
|
| 526 |
-
for course in courses_by_grade[grade]:
|
| 527 |
-
course_str = f"- {course.get('code', '')} {course.get('name', 'Unnamed course')}"
|
| 528 |
-
if 'grade' in course:
|
| 529 |
-
course_str += f" (Grade: {course['grade']})"
|
| 530 |
-
if 'credits' in course:
|
| 531 |
-
course_str += f" | Credits: {course['credits']}"
|
| 532 |
-
if 'year' in course:
|
| 533 |
-
course_str += f" | Year: {course['year']}"
|
| 534 |
-
output.append(course_str)
|
| 535 |
-
|
| 536 |
-
return '\n'.join(output)
|
| 537 |
-
|
| 538 |
def parse_transcript(file_obj, progress=gr.Progress()) -> Tuple[str, Optional[Dict]]:
|
| 539 |
-
"""Main function to parse transcript files
|
| 540 |
try:
|
| 541 |
if not file_obj:
|
| 542 |
raise ValueError("Please upload a file first")
|
|
@@ -544,32 +626,40 @@ def parse_transcript(file_obj, progress=gr.Progress()) -> Tuple[str, Optional[Di
|
|
| 544 |
validate_file(file_obj)
|
| 545 |
file_ext = os.path.splitext(file_obj.name)[1].lower()
|
| 546 |
|
| 547 |
-
# Extract text from file
|
|
|
|
|
|
|
|
|
|
| 548 |
text = extract_text_from_file(file_obj.name, file_ext)
|
| 549 |
|
| 550 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 551 |
parsed_data = parse_transcript_with_ai(text, progress)
|
| 552 |
|
| 553 |
# Format output text
|
| 554 |
-
|
| 555 |
-
|
| 556 |
-
# Prepare the data structure for saving
|
| 557 |
-
transcript_data = {
|
| 558 |
-
"grade_level": parsed_data.get('grade_level', 'Unknown'),
|
| 559 |
-
"gpa": parsed_data.get('gpa', {}),
|
| 560 |
-
"courses": defaultdict(list)
|
| 561 |
-
}
|
| 562 |
|
| 563 |
-
|
| 564 |
-
for course in parsed_data.get('courses', []):
|
| 565 |
-
grade_level = course.get('grade_level', 'Unknown')
|
| 566 |
-
transcript_data["courses"][grade_level].append(course)
|
| 567 |
|
| 568 |
-
return output_text,
|
| 569 |
|
| 570 |
except Exception as e:
|
| 571 |
-
|
| 572 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 573 |
|
| 574 |
# ========== LEARNING STYLE QUIZ ==========
|
| 575 |
class LearningStyleQuiz:
|
|
@@ -1449,7 +1539,7 @@ def create_interface():
|
|
| 1449 |
"Your profile summary will appear here after saving.",
|
| 1450 |
label="Profile Summary"
|
| 1451 |
)
|
| 1452 |
-
blog = gr.Textbox(label="Personal Blog", visible=False)
|
| 1453 |
|
| 1454 |
def save_profile_and_update(name, age, interests, transcript_data, learning_style,
|
| 1455 |
movie, movie_reason, show, show_reason,
|
|
@@ -1606,3 +1696,4 @@ app = create_interface()
|
|
| 1606 |
|
| 1607 |
if __name__ == "__main__":
|
| 1608 |
app.launch()
|
|
|
|
|
|
| 272 |
self.requirements = {}
|
| 273 |
self.current_courses = []
|
| 274 |
self.course_history = []
|
| 275 |
+
self.graduation_status = {}
|
| 276 |
|
| 277 |
def parse_transcript(self, text: str) -> Dict:
|
| 278 |
+
"""Enhanced parsing method for Miami-Dade format"""
|
| 279 |
+
try:
|
| 280 |
+
# First normalize the text (replace multiple spaces, normalize line breaks)
|
| 281 |
+
text = re.sub(r'\s+', ' ', text)
|
| 282 |
+
|
| 283 |
+
# Extract student info with more flexible patterns
|
| 284 |
+
self._extract_student_info(text)
|
| 285 |
+
|
| 286 |
+
# Extract requirements with better table parsing
|
| 287 |
+
self._extract_requirements(text)
|
| 288 |
+
|
| 289 |
+
# Extract course history with improved pattern matching
|
| 290 |
+
self._extract_course_history(text)
|
| 291 |
+
|
| 292 |
+
# Identify current courses
|
| 293 |
+
self._extract_current_courses(text)
|
| 294 |
+
|
| 295 |
+
# Calculate completion status
|
| 296 |
+
self._calculate_completion()
|
| 297 |
+
|
| 298 |
+
return {
|
| 299 |
+
"student_info": self.student_data,
|
| 300 |
+
"requirements": self.requirements,
|
| 301 |
+
"current_courses": self.current_courses,
|
| 302 |
+
"course_history": self.course_history,
|
| 303 |
+
"graduation_status": self.graduation_status
|
| 304 |
+
}
|
| 305 |
+
|
| 306 |
+
except Exception as e:
|
| 307 |
+
logging.error(f"Error parsing transcript: {str(e)}")
|
| 308 |
+
raise gr.Error(f"Error parsing transcript: {str(e)}")
|
| 309 |
|
| 310 |
def _extract_student_info(self, text: str):
|
| 311 |
+
"""Enhanced student info extraction for Miami-Dade format"""
|
| 312 |
+
# Extract basic student info
|
| 313 |
+
student_pattern = r"(\d{7})\s*-\s*([A-Z]+,\s*[A-Z]+)\s*Current Grade:\s*(\d+)\s*YOG\s*(\d{4})"
|
| 314 |
+
student_match = re.search(student_pattern, text, re.IGNORECASE)
|
|
|
|
|
|
|
|
|
|
| 315 |
|
| 316 |
+
if student_match:
|
|
|
|
| 317 |
self.student_data = {
|
| 318 |
+
"id": student_match.group(1),
|
| 319 |
+
"name": student_match.group(2).replace(",", ", "),
|
| 320 |
+
"current_grade": student_match.group(3),
|
| 321 |
+
"graduation_year": student_match.group(4)
|
| 322 |
}
|
| 323 |
|
| 324 |
+
# Extract GPA info
|
| 325 |
+
gpa_pattern = r"Un-weighted GPA\s*([\d.]+).*?Weighted GPA\s*([\d.]+)"
|
| 326 |
+
gpa_match = re.search(gpa_pattern, text, re.IGNORECASE)
|
|
|
|
|
|
|
| 327 |
|
| 328 |
+
if gpa_match:
|
|
|
|
| 329 |
self.student_data.update({
|
| 330 |
+
"unweighted_gpa": float(gpa_match.group(1)),
|
| 331 |
+
"weighted_gpa": float(gpa_match.group(2))
|
| 332 |
+
})
|
| 333 |
+
|
| 334 |
+
# Extract credits and service hours
|
| 335 |
+
credits_pattern = r"Total Credits Earned\s*([\d.]+).*?Comm Serv Hours\s*(\d+)"
|
| 336 |
+
credits_match = re.search(credits_pattern, text, re.IGNORECASE)
|
| 337 |
+
|
| 338 |
+
if credits_match:
|
| 339 |
+
self.student_data.update({
|
| 340 |
+
"total_credits": float(credits_match.group(1)),
|
| 341 |
+
"community_service_hours": int(credits_match.group(2))
|
| 342 |
})
|
| 343 |
|
| 344 |
def _extract_requirements(self, text: str):
|
| 345 |
+
"""Parse the graduation requirements section with improved table parsing"""
|
| 346 |
+
# Find the requirements table
|
| 347 |
+
req_table_start = re.search(r"Code\s*Description\s*Required\s*Waived\s*Completed\s*Status", text)
|
| 348 |
+
if not req_table_start:
|
| 349 |
+
raise ValueError("Could not find requirements table header")
|
| 350 |
+
|
| 351 |
+
req_text = text[req_table_start.start():]
|
| 352 |
+
|
| 353 |
+
# Extract individual requirements
|
| 354 |
+
req_pattern = (
|
| 355 |
+
r"([A-Z]-[\w\s\(\)&]+)\s*" # Code
|
| 356 |
+
r"([^\|]+)\s*" # Description
|
| 357 |
+
r"([\d.]+)\s*" # Required
|
| 358 |
+
r"([\d.]+)\s*" # Waived
|
| 359 |
+
r"([\d.]+)\s*" # Completed
|
| 360 |
+
r"([\d.]+)\s*%" # Status
|
| 361 |
)
|
| 362 |
|
| 363 |
+
req_matches = re.finditer(req_pattern, req_text)
|
| 364 |
+
|
| 365 |
+
for match in req_matches:
|
| 366 |
+
req_code = match.group(1).strip()
|
| 367 |
+
self.requirements[req_code] = {
|
| 368 |
+
"description": match.group(2).strip(),
|
| 369 |
+
"required": float(match.group(3)),
|
| 370 |
+
"waived": float(match.group(4)),
|
| 371 |
+
"completed": float(match.group(5)),
|
| 372 |
+
"status": f"{match.group(6)}%"
|
| 373 |
+
}
|
| 374 |
+
|
| 375 |
+
# Extract total requirements
|
| 376 |
+
total_pattern = r"Total\s*([\d.]+)\s*([\d.]+)\s*([\d.]+)\s*([\d.]+)%"
|
| 377 |
+
total_match = re.search(total_pattern, req_text)
|
| 378 |
+
|
| 379 |
+
if total_match:
|
| 380 |
+
self.graduation_status["total_requirements"] = {
|
| 381 |
+
"required": float(total_match.group(1)),
|
| 382 |
+
"waived": float(total_match.group(2)),
|
| 383 |
+
"completed": float(total_match.group(3)),
|
| 384 |
+
"percent_complete": float(total_match.group(4))
|
| 385 |
}
|
| 386 |
|
| 387 |
def _extract_course_history(self, text: str):
|
| 388 |
+
"""Parse the detailed course history with improved pattern matching"""
|
| 389 |
+
# Find the course history table
|
| 390 |
+
course_header = re.search(r"Requirement\s*School Year\s*GradeLv1\s*CrsNu m\s*Description\s*Term\s*DstNumber\s*FG\s*Incl\s*Credits", text)
|
| 391 |
+
if not course_header:
|
| 392 |
+
raise ValueError("Could not find course history table header")
|
| 393 |
+
|
| 394 |
+
course_text = text[course_header.start():]
|
| 395 |
+
|
| 396 |
+
# Extract individual courses
|
| 397 |
+
course_pattern = (
|
| 398 |
+
r"([A-Z]-[\w\s\(\)&-]+)\s*" # Requirement
|
| 399 |
+
r"(\d{4}-\d{4})\s*" # School Year
|
| 400 |
+
r"(\d{2})\s*" # Grade Level
|
| 401 |
+
r"([A-Z0-9]+)\s*" # Course Number
|
| 402 |
+
r"([^\|]+)\s*" # Description
|
| 403 |
+
r"([A-Z0-9]+)\s*" # Term
|
| 404 |
+
r"([A-Z0-9]+)\s*" # District Number
|
| 405 |
+
r"([A-Z])\s*" # Final Grade
|
| 406 |
+
r"([A-Z])\s*" # Inclusion Status
|
| 407 |
+
r"([\d.]+|inProgress)" # Credits
|
| 408 |
)
|
| 409 |
|
| 410 |
+
course_matches = re.finditer(course_pattern, course_text)
|
| 411 |
+
|
| 412 |
+
for match in course_matches:
|
| 413 |
self.course_history.append({
|
| 414 |
+
"requirement_category": match.group(1).strip(),
|
| 415 |
+
"school_year": match.group(2),
|
| 416 |
+
"grade_level": match.group(3),
|
| 417 |
+
"course_code": match.group(4),
|
| 418 |
+
"description": match.group(5).strip(),
|
| 419 |
+
"term": match.group(6),
|
| 420 |
+
"district_number": match.group(7),
|
| 421 |
+
"grade": match.group(8),
|
| 422 |
+
"inclusion_status": match.group(9),
|
| 423 |
+
"credits": match.group(10)
|
| 424 |
})
|
| 425 |
|
| 426 |
def _extract_current_courses(self, text: str):
|
| 427 |
"""Identify courses currently in progress"""
|
|
|
|
| 428 |
self.current_courses = [
|
| 429 |
{
|
| 430 |
"course": c["description"],
|
| 431 |
+
"code": c["course_code"],
|
| 432 |
"category": c["requirement_category"],
|
| 433 |
"term": c["term"],
|
| 434 |
+
"credits": c["credits"],
|
| 435 |
+
"grade_level": c["grade_level"]
|
| 436 |
}
|
| 437 |
+
for c in self.course_history
|
| 438 |
+
if c["credits"].lower() == "inprogress"
|
| 439 |
]
|
| 440 |
|
| 441 |
+
def _calculate_completion(self):
|
| 442 |
+
"""Calculate overall completion status with more detailed info"""
|
| 443 |
total_required = sum(req["required"] for req in self.requirements.values())
|
| 444 |
total_completed = sum(req["completed"] for req in self.requirements.values())
|
| 445 |
|
| 446 |
+
self.graduation_status.update({
|
| 447 |
+
"total_required_credits": total_required,
|
| 448 |
+
"total_completed_credits": total_completed,
|
| 449 |
"percent_complete": round((total_completed / total_required) * 100, 1),
|
| 450 |
+
"remaining_credits": total_required - total_completed,
|
| 451 |
+
"on_track": (total_completed / total_required) >= 0.75 # 75% completion considered on track
|
| 452 |
+
})
|
| 453 |
|
| 454 |
def to_json(self) -> str:
|
| 455 |
"""Export parsed data as JSON"""
|
|
|
|
| 458 |
"requirements": self.requirements,
|
| 459 |
"current_courses": self.current_courses,
|
| 460 |
"course_history": self.course_history,
|
| 461 |
+
"graduation_status": self.graduation_status
|
| 462 |
}, indent=2)
|
| 463 |
|
| 464 |
+
def format_transcript_output(data: Dict) -> str:
|
| 465 |
+
"""Enhanced formatting for Miami-Dade transcript output"""
|
| 466 |
+
output = []
|
| 467 |
+
|
| 468 |
+
# Student Info Section
|
| 469 |
+
student = data.get("student_info", {})
|
| 470 |
+
output.append(f"## Student Transcript Summary\n{'='*50}")
|
| 471 |
+
output.append(f"**Name:** {student.get('name', 'Unknown')}")
|
| 472 |
+
output.append(f"**Student ID:** {student.get('id', 'Unknown')}")
|
| 473 |
+
output.append(f"**Current Grade:** {student.get('current_grade', 'Unknown')}")
|
| 474 |
+
output.append(f"**Graduation Year:** {student.get('graduation_year', 'Unknown')}")
|
| 475 |
+
output.append(f"**Unweighted GPA:** {student.get('unweighted_gpa', 'N/A')}")
|
| 476 |
+
output.append(f"**Weighted GPA:** {student.get('weighted_gpa', 'N/A')}")
|
| 477 |
+
output.append(f"**Total Credits Earned:** {student.get('total_credits', 'N/A')}")
|
| 478 |
+
output.append(f"**Community Service Hours:** {student.get('community_service_hours', 'N/A')}\n")
|
| 479 |
+
|
| 480 |
+
# Graduation Requirements Section
|
| 481 |
+
grad_status = data.get("graduation_status", {})
|
| 482 |
+
output.append(f"## Graduation Progress\n{'='*50}")
|
| 483 |
+
output.append(f"**Overall Completion:** {grad_status.get('percent_complete', 0)}%")
|
| 484 |
+
output.append(f"**Credits Required:** {grad_status.get('total_required_credits', 0)}")
|
| 485 |
+
output.append(f"**Credits Completed:** {grad_status.get('total_completed_credits', 0)}")
|
| 486 |
+
output.append(f"**Credits Remaining:** {grad_status.get('remaining_credits', 0)}")
|
| 487 |
+
output.append(f"**On Track to Graduate:** {'Yes' if grad_status.get('on_track', False) else 'No'}\n")
|
| 488 |
+
|
| 489 |
+
# Detailed Requirements
|
| 490 |
+
output.append("### Detailed Requirements:")
|
| 491 |
+
for code, req in data.get("requirements", {}).items():
|
| 492 |
+
output.append(
|
| 493 |
+
f"- **{code}**: {req.get('description', '')}\n"
|
| 494 |
+
f" Required: {req['required']} | Completed: {req['completed']} | "
|
| 495 |
+
f"Status: {req['status']}"
|
| 496 |
+
)
|
| 497 |
+
output.append("")
|
| 498 |
+
|
| 499 |
+
# Current Courses
|
| 500 |
+
if data.get("current_courses"):
|
| 501 |
+
output.append("## Current Courses (In Progress)\n" + '='*50)
|
| 502 |
+
for course in data["current_courses"]:
|
| 503 |
+
output.append(
|
| 504 |
+
f"- **{course['code']} {course['course']}**\n"
|
| 505 |
+
f" Category: {course['category']} | "
|
| 506 |
+
f"Grade Level: {course['grade_level']} | "
|
| 507 |
+
f"Term: {course['term']} | Credits: {course['credits']}"
|
| 508 |
+
)
|
| 509 |
+
output.append("")
|
| 510 |
+
|
| 511 |
+
# Course History by Year
|
| 512 |
+
courses_by_year = defaultdict(list)
|
| 513 |
+
for course in data.get("course_history", []):
|
| 514 |
+
courses_by_year[course["school_year"]].append(course)
|
| 515 |
|
| 516 |
+
if courses_by_year:
|
| 517 |
+
output.append("## Course History\n" + '='*50)
|
| 518 |
+
for year in sorted(courses_by_year.keys()):
|
| 519 |
+
output.append(f"\n### {year}")
|
| 520 |
+
for course in courses_by_year[year]:
|
| 521 |
+
output.append(
|
| 522 |
+
f"- **{course['course_code']} {course['description']}**\n"
|
| 523 |
+
f" Grade: {course['grade']} | Credits: {course['credits']} | "
|
| 524 |
+
f"Category: {course['requirement_category']} | Term: {course['term']}"
|
| 525 |
+
)
|
| 526 |
+
|
| 527 |
+
return '\n'.join(output)
|
| 528 |
+
|
| 529 |
+
def parse_transcript_with_ai(text: str, progress=gr.Progress()) -> Dict:
|
| 530 |
+
"""Enhanced AI parsing with fallback to structured parsing"""
|
| 531 |
try:
|
| 532 |
+
# First try structured parsing
|
| 533 |
if progress:
|
| 534 |
+
progress(0.1, desc="Attempting structured parsing...")
|
| 535 |
+
|
| 536 |
parser = TranscriptParser()
|
| 537 |
parsed_data = parser.parse_transcript(text)
|
| 538 |
+
|
| 539 |
if progress:
|
| 540 |
+
progress(0.8, desc="Formatting results...")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 541 |
|
| 542 |
+
return parsed_data
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 543 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 544 |
except Exception as e:
|
| 545 |
logging.warning(f"Structured parsing failed, falling back to AI: {str(e)}")
|
| 546 |
+
|
| 547 |
# Fall back to AI parsing if structured parsing fails
|
| 548 |
return parse_transcript_with_ai_fallback(text, progress)
|
| 549 |
|
|
|
|
| 557 |
- Current grade level
|
| 558 |
- Weighted GPA (if available)
|
| 559 |
- Unweighted GPA (if available)
|
| 560 |
+
- Total credits earned
|
| 561 |
+
- Community service hours (if available)
|
| 562 |
- List of all courses with:
|
| 563 |
* Course code
|
| 564 |
* Course name
|
|
|
|
| 566 |
* Credits earned
|
| 567 |
* Year/semester taken
|
| 568 |
* Grade level when taken
|
| 569 |
+
- Graduation requirements status
|
| 570 |
Return the data in JSON format.
|
| 571 |
|
| 572 |
Transcript Text:
|
|
|
|
| 577 |
if progress:
|
| 578 |
progress(0.1, desc="Processing transcript with AI...")
|
| 579 |
|
| 580 |
+
model, tokenizer = model_loader.load_model(progress)
|
| 581 |
+
if model is None or tokenizer is None:
|
| 582 |
+
raise gr.Error(f"Model failed to load. {model_loader.error or 'Please try loading a model first.'}")
|
| 583 |
+
|
| 584 |
# Tokenize and generate response
|
| 585 |
inputs = tokenizer(prompt, return_tensors="pt").to(model_loader.device)
|
| 586 |
if progress:
|
|
|
|
| 588 |
|
| 589 |
outputs = model.generate(
|
| 590 |
**inputs,
|
| 591 |
+
max_new_tokens=2000,
|
| 592 |
temperature=0.1,
|
| 593 |
do_sample=True
|
| 594 |
)
|
|
|
|
| 617 |
logging.error(f"AI parsing error: {str(e)}")
|
| 618 |
raise gr.Error(f"Error processing transcript: {str(e)}")
|
| 619 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 620 |
def parse_transcript(file_obj, progress=gr.Progress()) -> Tuple[str, Optional[Dict]]:
|
| 621 |
+
"""Main function to parse transcript files with better error handling"""
|
| 622 |
try:
|
| 623 |
if not file_obj:
|
| 624 |
raise ValueError("Please upload a file first")
|
|
|
|
| 626 |
validate_file(file_obj)
|
| 627 |
file_ext = os.path.splitext(file_obj.name)[1].lower()
|
| 628 |
|
| 629 |
+
# Extract text from file with better error reporting
|
| 630 |
+
if progress:
|
| 631 |
+
progress(0.2, desc="Extracting text from file...")
|
| 632 |
+
|
| 633 |
text = extract_text_from_file(file_obj.name, file_ext)
|
| 634 |
|
| 635 |
+
if not text.strip():
|
| 636 |
+
raise ValueError("No text could be extracted from the file")
|
| 637 |
+
|
| 638 |
+
# Use AI for parsing with progress updates
|
| 639 |
+
if progress:
|
| 640 |
+
progress(0.4, desc="Analyzing transcript content...")
|
| 641 |
+
|
| 642 |
parsed_data = parse_transcript_with_ai(text, progress)
|
| 643 |
|
| 644 |
# Format output text
|
| 645 |
+
if progress:
|
| 646 |
+
progress(0.9, desc="Generating report...")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 647 |
|
| 648 |
+
output_text = format_transcript_output(parsed_data)
|
|
|
|
|
|
|
|
|
|
| 649 |
|
| 650 |
+
return output_text, parsed_data
|
| 651 |
|
| 652 |
except Exception as e:
|
| 653 |
+
error_msg = f"Error processing transcript: {str(e)}"
|
| 654 |
+
logging.error(error_msg)
|
| 655 |
+
|
| 656 |
+
# Provide helpful tips based on error type
|
| 657 |
+
if "No text could be extracted" in str(e):
|
| 658 |
+
error_msg += "\n\nTips: Please ensure your file is clear and readable. Try scanning at a higher resolution if it's an image."
|
| 659 |
+
elif "requirements table header" in str(e):
|
| 660 |
+
error_msg += "\n\nTips: This appears to be an unsupported transcript format. Please contact support."
|
| 661 |
+
|
| 662 |
+
return error_msg, None
|
| 663 |
|
| 664 |
# ========== LEARNING STYLE QUIZ ==========
|
| 665 |
class LearningStyleQuiz:
|
|
|
|
| 1539 |
"Your profile summary will appear here after saving.",
|
| 1540 |
label="Profile Summary"
|
| 1541 |
)
|
| 1542 |
+
blog = gr.Textbox(label="Personal Blog", visible=False)
|
| 1543 |
|
| 1544 |
def save_profile_and_update(name, age, interests, transcript_data, learning_style,
|
| 1545 |
movie, movie_reason, show, show_reason,
|
|
|
|
| 1696 |
|
| 1697 |
if __name__ == "__main__":
|
| 1698 |
app.launch()
|
| 1699 |
+
|