Dannyar608 commited on
Commit
41f6b04
·
verified ·
1 Parent(s): 48e62d8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +150 -192
app.py CHANGED
@@ -159,11 +159,11 @@ def validate_name(name: str) -> str:
159
  """Validate name input."""
160
  name = name.strip()
161
  if not name:
162
- raise gr.Error("Name cannot be empty")
163
  if len(name) > 100:
164
- raise gr.Error("Name is too long (max 100 characters)")
165
  if any(c.isdigit() for c in name):
166
- raise gr.Error("Name cannot contain numbers")
167
  return name
168
 
169
  def validate_age(age: Union[int, float, str]) -> int:
@@ -171,23 +171,23 @@ def validate_age(age: Union[int, float, str]) -> int:
171
  try:
172
  age_int = int(age)
173
  if not MIN_AGE <= age_int <= MAX_AGE:
174
- raise gr.Error(f"Age must be between {MIN_AGE} and {MAX_AGE}")
175
  return age_int
176
  except (ValueError, TypeError):
177
- raise gr.Error("Please enter a valid age number")
178
 
179
  def validate_file(file_obj) -> None:
180
  """Validate uploaded file."""
181
  if not file_obj:
182
- raise ValueError("No file uploaded")
183
 
184
  file_ext = os.path.splitext(file_obj.name)[1].lower()
185
  if file_ext not in ALLOWED_FILE_TYPES:
186
- raise gr.Error(f"Invalid file type. Allowed: {', '.join(ALLOWED_FILE_TYPES)}")
187
 
188
  file_size = os.path.getsize(file_obj.name) / (1024 * 1024) # MB
189
  if file_size > MAX_FILE_SIZE_MB:
190
- raise gr.Error(f"File too large. Max size: {MAX_FILE_SIZE_MB}MB")
191
 
192
  # ========== TEXT EXTRACTION FUNCTIONS ==========
193
  def extract_text_from_file(file_path: str, file_ext: str) -> str:
@@ -214,7 +214,7 @@ def extract_text_from_file(file_path: str, file_ext: str) -> str:
214
  text = clean_extracted_text(text)
215
 
216
  if not text.strip():
217
- raise ValueError("No text could be extracted from the file. Please ensure the file is clear and readable.")
218
 
219
  return text
220
 
@@ -331,21 +331,89 @@ class TranscriptParser:
331
  return 'standard'
332
 
333
  def parse_miami_dade(self, text: str) -> Dict:
334
- """Parse Miami-Dade formatted transcripts"""
335
- self._extract_student_info(text)
336
- self._extract_requirements(text)
337
- self._extract_course_history(text)
338
- self._extract_current_courses(text)
339
- self._calculate_completion()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
340
 
341
- return {
342
- "student_info": self.student_data,
343
- "requirements": self.requirements,
344
- "current_courses": self.current_courses,
345
- "course_history": self.course_history,
346
- "graduation_status": self.graduation_status,
347
- "format": "miami_dade"
348
- }
349
 
350
  def parse_standard(self, text: str) -> Dict:
351
  """Parse standard formatted transcripts"""
@@ -419,148 +487,7 @@ class TranscriptParser:
419
  "format": "homeschool"
420
  }
421
 
422
- def _extract_student_info(self, text: str):
423
- """Enhanced student info extraction for Miami-Dade format"""
424
- student_pattern = r"(\d{7})\s*-\s*([A-Z]+,\s*[A-Z]+)\s*Current Grade:\s*(\d+)\s*YOG\s*(\d{4})"
425
- student_match = re.search(student_pattern, text, re.IGNORECASE)
426
-
427
- if student_match:
428
- self.student_data = {
429
- "id": student_match.group(1),
430
- "name": student_match.group(2).replace(",", ", "),
431
- "current_grade": student_match.group(3),
432
- "graduation_year": student_match.group(4)
433
- }
434
- else:
435
- # Fallback pattern for alternative formats
436
- fallback_pattern = r"Student:\s*([^\n]+)\s*ID:\s*(\d+)\s*Grade:\s*(\d+)"
437
- fallback_match = re.search(fallback_pattern, text, re.IGNORECASE)
438
- if fallback_match:
439
- self.student_data = {
440
- "name": fallback_match.group(1).strip(),
441
- "id": fallback_match.group(2),
442
- "current_grade": fallback_match.group(3),
443
- "graduation_year": "Unknown"
444
- }
445
-
446
- # Extract GPA info
447
- gpa_pattern = r"Un-weighted GPA\s*([\d.]+).*?Weighted GPA\s*([\d.]+)"
448
- gpa_match = re.search(gpa_pattern, text, re.IGNORECASE)
449
-
450
- if gpa_match:
451
- self.student_data.update({
452
- "unweighted_gpa": float(gpa_match.group(1)),
453
- "weighted_gpa": float(gpa_match.group(2))
454
- })
455
- else:
456
- # Try alternative GPA patterns
457
- alt_gpa_pattern = r"GPA\s*([\d.]+)\s*/\s*([\d.]+)"
458
- alt_match = re.search(alt_gpa_pattern, text)
459
- if alt_match:
460
- self.student_data.update({
461
- "unweighted_gpa": float(alt_match.group(1)),
462
- "weighted_gpa": float(alt_match.group(2))
463
- })
464
-
465
- # Extract credits and service hours
466
- credits_pattern = r"Total Credits Earned\s*([\d.]+).*?Comm Serv Hours\s*(\d+)"
467
- credits_match = re.search(credits_pattern, text, re.IGNORECASE)
468
-
469
- if credits_match:
470
- self.student_data.update({
471
- "total_credits": float(credits_match.group(1)),
472
- "community_service_hours": int(credits_match.group(2))
473
- })
474
-
475
- def _extract_requirements(self, text: str):
476
- """Parse the graduation requirements section with improved table parsing"""
477
- # Find the requirements table
478
- req_table_start = re.search(r"Code\s*Description\s*Required\s*Waived\s*Completed\s*Status", text)
479
- if not req_table_start:
480
- # Try alternative table headers
481
- req_table_start = re.search(r"Requirement\s*Req\s*Comp\s*Status", text)
482
- if not req_table_start:
483
- raise ValueError("Could not find requirements table header")
484
-
485
- req_text = text[req_table_start.start():]
486
-
487
- # Extract individual requirements
488
- req_pattern = (
489
- r"([A-Z]-[\w\s\(\)&]+)\s*" # Code
490
- r"([^\|]+)\s*" # Description
491
- r"([\d.]+)\s*" # Required
492
- r"([\d.]+)\s*" # Waived
493
- r"([\d.]+)\s*" # Completed
494
- r"([\d.]+)\s*%" # Status
495
- )
496
-
497
- req_matches = re.finditer(req_pattern, req_text)
498
-
499
- for match in req_matches:
500
- req_code = match.group(1).strip()
501
- self.requirements[req_code] = {
502
- "description": match.group(2).strip(),
503
- "required": float(match.group(3)),
504
- "waived": float(match.group(4)),
505
- "completed": float(match.group(5)),
506
- "status": f"{match.group(6)}%"
507
- }
508
-
509
- # Extract total requirements
510
- total_pattern = r"Total\s*([\d.]+)\s*([\d.]+)\s*([\d.]+)\s*([\d.]+)%"
511
- total_match = re.search(total_pattern, req_text)
512
-
513
- if total_match:
514
- self.graduation_status["total_requirements"] = {
515
- "required": float(total_match.group(1)),
516
- "waived": float(total_match.group(2)),
517
- "completed": float(total_match.group(3)),
518
- "percent_complete": float(total_match.group(4))
519
- }
520
-
521
- def _extract_course_history(self, text: str):
522
- """Parse the detailed course history with improved pattern matching"""
523
- # Find the course history table
524
- course_header = re.search(r"Requirement\s*School Year\s*GradeLv1\s*CrsNu m\s*Description\s*Term\s*DstNumber\s*FG\s*Incl\s*Credits", text)
525
- if not course_header:
526
- # Try alternative course history headers
527
- course_header = re.search(r"Course\s*Grade\s*Credit\s*Year", text)
528
- if not course_header:
529
- raise ValueError("Could not find course history table header")
530
-
531
- course_text = text[course_header.start():]
532
-
533
- # Extract individual courses
534
- course_pattern = (
535
- r"([A-Z]-[\w\s\(\)&-]+)\s*" # Requirement
536
- r"(\d{4}-\d{4})\s*" # School Year
537
- r"(\d{2})\s*" # Grade Level
538
- r"([A-Z0-9]+)\s*" # Course Number
539
- r"([^\|]+)\s*" # Description
540
- r"([A-Z0-9]+)\s*" # Term
541
- r"([A-Z0-9]+)\s*" # District Number
542
- r"([A-Z])\s*" # Final Grade
543
- r"([A-Z])\s*" # Inclusion Status
544
- r"([\d.]+|inProgress)" # Credits
545
- )
546
-
547
- course_matches = re.finditer(course_pattern, course_text)
548
-
549
- for match in course_matches:
550
- self.course_history.append({
551
- "requirement_category": match.group(1).strip(),
552
- "school_year": match.group(2),
553
- "grade_level": match.group(3),
554
- "course_code": match.group(4),
555
- "description": match.group(5).strip(),
556
- "term": match.group(6),
557
- "district_number": match.group(7),
558
- "grade": match.group(8),
559
- "inclusion_status": match.group(9),
560
- "credits": match.group(10)
561
- })
562
-
563
- def _extract_current_courses(self, text: str):
564
  """Identify courses currently in progress"""
565
  self.current_courses = [
566
  {
@@ -572,7 +499,7 @@ class TranscriptParser:
572
  "grade_level": c["grade_level"]
573
  }
574
  for c in self.course_history
575
- if c["credits"].lower() == "inprogress"
576
  ]
577
 
578
  def _calculate_completion(self):
@@ -1023,12 +950,32 @@ class ProfileManager:
1023
  movie: str, movie_reason: str, show: str, show_reason: str,
1024
  book: str, book_reason: str, character: str, character_reason: str,
1025
  blog: str) -> str:
1026
- """Save student profile with validation."""
1027
  try:
1028
- # Validate required fields
1029
- name = validate_name(name)
1030
- age = validate_age(age)
1031
- interests = sanitize_input(interests)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1032
 
1033
  # Prepare favorites data
1034
  favorites = {
@@ -1045,8 +992,8 @@ class ProfileManager:
1045
  # Prepare full profile data
1046
  data = {
1047
  "name": name,
1048
- "age": age,
1049
- "interests": interests,
1050
  "transcript": transcript if transcript else {},
1051
  "learning_style": learning_style if learning_style else "Not assessed",
1052
  "favorites": favorites,
@@ -1076,8 +1023,8 @@ class ProfileManager:
1076
  return self._generate_profile_summary(data)
1077
 
1078
  except Exception as e:
1079
- logging.error(f"Error saving profile: {str(e)}")
1080
- raise gr.Error(f"Error saving profile: {str(e)}")
1081
 
1082
  def load_profile(self, name: str = None, session_token: str = None) -> Dict:
1083
  """Load profile by name or return the first one found."""
@@ -1544,22 +1491,11 @@ def create_interface():
1544
  def process_transcript(file_obj, current_tab_status):
1545
  try:
1546
  if not file_obj:
1547
- raise ValueError("Please upload a file first")
1548
 
1549
  output_text, data = parse_transcript(file_obj)
1550
- if "Error" not in output_text:
1551
- new_status = current_tab_status.copy()
1552
- new_status[0] = True
1553
- return (
1554
- output_text,
1555
- data,
1556
- new_status,
1557
- gr.update(elem_classes="completed-tab"),
1558
- gr.update(interactive=True),
1559
- gr.update(visible=False),
1560
- gr.update(visible=False)
1561
- )
1562
- else:
1563
  return (
1564
  output_text,
1565
  None,
@@ -1569,10 +1505,23 @@ def create_interface():
1569
  gr.update(visible=True, value=f"<div class='error-message'>{output_text}</div>"),
1570
  gr.update(visible=False)
1571
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
1572
  except Exception as e:
1573
- error_msg = f" Error: {str(e)}"
1574
  if "PDF" in str(e):
1575
- error_msg += "\n\nTIPS FOR PDF FILES:\n1. Try opening and re-saving the PDF\n2. Ensure it's not password protected\n3. Try converting to an image"
1576
  return (
1577
  error_msg,
1578
  None,
@@ -1881,9 +1830,18 @@ def create_interface():
1881
 
1882
  # Check if current tab is completed
1883
  if not tab_completed_status.get(current_tab, False):
 
 
 
 
 
 
1884
  return (
1885
  gr.Tabs(selected=current_tab),
1886
- gr.update(value=f"<div class='error-message'>⚠️ Please complete Step {current_tab+1} first!</div>", visible=True)
 
 
 
1887
  )
1888
 
1889
  return gr.Tabs(selected=tab_index), gr.update(visible=False)
 
159
  """Validate name input."""
160
  name = name.strip()
161
  if not name:
162
+ raise ValueError("Name cannot be empty. Please enter your full name.")
163
  if len(name) > 100:
164
+ raise ValueError("Name is too long (maximum 100 characters).")
165
  if any(c.isdigit() for c in name):
166
+ raise ValueError("Name cannot contain numbers.")
167
  return name
168
 
169
  def validate_age(age: Union[int, float, str]) -> int:
 
171
  try:
172
  age_int = int(age)
173
  if not MIN_AGE <= age_int <= MAX_AGE:
174
+ raise ValueError(f"Age must be between {MIN_AGE} and {MAX_AGE}.")
175
  return age_int
176
  except (ValueError, TypeError):
177
+ raise ValueError("Please enter a valid age number.")
178
 
179
  def validate_file(file_obj) -> None:
180
  """Validate uploaded file."""
181
  if not file_obj:
182
+ raise ValueError("Please upload a file first.")
183
 
184
  file_ext = os.path.splitext(file_obj.name)[1].lower()
185
  if file_ext not in ALLOWED_FILE_TYPES:
186
+ raise ValueError(f"Invalid file type. Allowed types: {', '.join(ALLOWED_FILE_TYPES)}")
187
 
188
  file_size = os.path.getsize(file_obj.name) / (1024 * 1024) # MB
189
  if file_size > MAX_FILE_SIZE_MB:
190
+ raise ValueError(f"File too large. Maximum size is {MAX_FILE_SIZE_MB}MB.")
191
 
192
  # ========== TEXT EXTRACTION FUNCTIONS ==========
193
  def extract_text_from_file(file_path: str, file_ext: str) -> str:
 
214
  text = clean_extracted_text(text)
215
 
216
  if not text.strip():
217
+ raise ValueError("No text could be extracted. Please ensure the file is clear and readable.")
218
 
219
  return text
220
 
 
331
  return 'standard'
332
 
333
  def parse_miami_dade(self, text: str) -> Dict:
334
+ """Parse Miami-Dade formatted transcripts with enhanced error handling"""
335
+ try:
336
+ # Extract student info with more robust patterns
337
+ student_info = re.search(
338
+ r"(\d{7})\s*-\s*([A-Z]+,\s*[A-Z]+).*?Current Grade:\s*(\d+)\s*YOG\s*(\d{4})"
339
+ r".*?Un-weighted GPA\s*([\d.]+).*?Weighted GPA\s*([\d.]+)"
340
+ r".*?Total Credits Earned\s*([\d.]+).*?Comm Serv Hours\s*(\d+)",
341
+ text, re.DOTALL
342
+ )
343
+
344
+ if student_info:
345
+ self.student_data = {
346
+ "id": student_info.group(1),
347
+ "name": student_info.group(2).replace(",", ", "),
348
+ "current_grade": student_info.group(3),
349
+ "graduation_year": student_info.group(4),
350
+ "unweighted_gpa": float(student_info.group(5)),
351
+ "weighted_gpa": float(student_info.group(6)),
352
+ "total_credits": float(student_info.group(7)),
353
+ "community_service_hours": int(student_info.group(8))
354
+ }
355
+
356
+ # Extract requirements with better table parsing
357
+ req_table = re.search(
358
+ r"Code\s*Description\s*Required\s*Waived\s*Completed\s*Status(.*?)Total\s*[\d.]+\s*[\d.]+\s*[\d.]+\s*[\d.]+%",
359
+ text, re.DOTALL
360
+ )
361
+
362
+ if req_table:
363
+ req_matches = re.finditer(
364
+ r"([A-Z]-[\w\s\(\)&]+)\s*([^\n]+?)\s*([\d.]+)\s*([\d.]+)\s*([\d.]+)\s*([\d.]+)%",
365
+ req_table.group(1)
366
+ )
367
+
368
+ for match in req_matches:
369
+ req_code = match.group(1).strip()
370
+ self.requirements[req_code] = {
371
+ "description": match.group(2).strip(),
372
+ "required": float(match.group(3)),
373
+ "waived": float(match.group(4)),
374
+ "completed": float(match.group(5)),
375
+ "status": f"{match.group(6)}%"
376
+ }
377
+
378
+ # Extract course history with more flexible parsing
379
+ course_section = re.search(r"Requirement\s*School Year\s*GradeLv1\s*CrsNu m\s*Description(.*?)(?=Legend for Incl:|$)", text, re.DOTALL)
380
+
381
+ if course_section:
382
+ course_matches = re.finditer(
383
+ r"([A-Z]-[\w\s\(\)&-]+)\s*(\d{4}-\d{4}|\d{1,2})\s*(\d{2})\s*([A-Z0-9]+)\s*([^\n]+?)\s*([A-Z0-9]+)\s*([A-Z0-9]+)\s*([A-Z])\s*([A-Z])\s*([\d.]+|inProgress)",
384
+ course_section.group(1)
385
+ )
386
+
387
+ for match in course_matches:
388
+ self.course_history.append({
389
+ "requirement_category": match.group(1).strip(),
390
+ "school_year": match.group(2),
391
+ "grade_level": match.group(3),
392
+ "course_code": match.group(4),
393
+ "description": match.group(5).strip(),
394
+ "term": match.group(6),
395
+ "district_number": match.group(7),
396
+ "grade": match.group(8),
397
+ "inclusion_status": match.group(9),
398
+ "credits": match.group(10)
399
+ })
400
+
401
+ # Identify current courses
402
+ self._extract_current_courses()
403
+ self._calculate_completion()
404
+
405
+ return {
406
+ "student_info": self.student_data,
407
+ "requirements": self.requirements,
408
+ "current_courses": self.current_courses,
409
+ "course_history": self.course_history,
410
+ "graduation_status": self.graduation_status,
411
+ "format": "miami_dade"
412
+ }
413
 
414
+ except Exception as e:
415
+ logging.error(f"Error parsing Miami-Dade transcript: {str(e)}")
416
+ raise ValueError(f"Couldn't parse transcript. Please ensure it's a valid Miami-Dade transcript. Error: {str(e)}")
 
 
 
 
 
417
 
418
  def parse_standard(self, text: str) -> Dict:
419
  """Parse standard formatted transcripts"""
 
487
  "format": "homeschool"
488
  }
489
 
490
+ def _extract_current_courses(self):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
491
  """Identify courses currently in progress"""
492
  self.current_courses = [
493
  {
 
499
  "grade_level": c["grade_level"]
500
  }
501
  for c in self.course_history
502
+ if isinstance(c["credits"], str) and c["credits"].lower() == "inprogress"
503
  ]
504
 
505
  def _calculate_completion(self):
 
950
  movie: str, movie_reason: str, show: str, show_reason: str,
951
  book: str, book_reason: str, character: str, character_reason: str,
952
  blog: str) -> str:
953
+ """Save student profile with better validation messages"""
954
  try:
955
+ # Validate required fields with specific messages
956
+ if not name.strip():
957
+ raise ValueError("Name cannot be empty. Please enter your full name.")
958
+ if len(name) > 100:
959
+ raise ValueError("Name is too long (maximum 100 characters).")
960
+ if any(c.isdigit() for c in name):
961
+ raise ValueError("Name cannot contain numbers.")
962
+
963
+ try:
964
+ age_int = int(age)
965
+ if not MIN_AGE <= age_int <= MAX_AGE:
966
+ raise ValueError(f"Age must be between {MIN_AGE} and {MAX_AGE}.")
967
+ except (ValueError, TypeError):
968
+ raise ValueError("Please enter a valid age number.")
969
+
970
+ if not interests.strip():
971
+ raise ValueError("Please describe at least one interest or hobby.")
972
+
973
+ if not transcript:
974
+ raise ValueError("Please complete the transcript analysis first.")
975
+
976
+ # Validate learning style quiz completion
977
+ if not learning_style or "Your primary learning style is:" not in learning_style:
978
+ raise ValueError("Please complete the learning style quiz first.")
979
 
980
  # Prepare favorites data
981
  favorites = {
 
992
  # Prepare full profile data
993
  data = {
994
  "name": name,
995
+ "age": age_int,
996
+ "interests": sanitize_input(interests),
997
  "transcript": transcript if transcript else {},
998
  "learning_style": learning_style if learning_style else "Not assessed",
999
  "favorites": favorites,
 
1023
  return self._generate_profile_summary(data)
1024
 
1025
  except Exception as e:
1026
+ logging.error(f"Profile validation error: {str(e)}")
1027
+ raise gr.Error(f"Couldn't save profile: {str(e)}")
1028
 
1029
  def load_profile(self, name: str = None, session_token: str = None) -> Dict:
1030
  """Load profile by name or return the first one found."""
 
1491
  def process_transcript(file_obj, current_tab_status):
1492
  try:
1493
  if not file_obj:
1494
+ raise ValueError("Please upload a transcript file first.")
1495
 
1496
  output_text, data = parse_transcript(file_obj)
1497
+
1498
+ if "Error" in output_text:
 
 
 
 
 
 
 
 
 
 
 
1499
  return (
1500
  output_text,
1501
  None,
 
1505
  gr.update(visible=True, value=f"<div class='error-message'>{output_text}</div>"),
1506
  gr.update(visible=False)
1507
  )
1508
+
1509
+ new_status = current_tab_status.copy()
1510
+ new_status[0] = True
1511
+ return (
1512
+ output_text,
1513
+ data,
1514
+ new_status,
1515
+ gr.update(elem_classes="completed-tab"),
1516
+ gr.update(interactive=True),
1517
+ gr.update(visible=False),
1518
+ gr.update(visible=False)
1519
+ )
1520
+
1521
  except Exception as e:
1522
+ error_msg = f"Error processing transcript: {str(e)}"
1523
  if "PDF" in str(e):
1524
+ error_msg += "\n\nTIPS:\n- Try re-saving the PDF\n- Ensure it's not password protected\n- Try converting to an image"
1525
  return (
1526
  error_msg,
1527
  None,
 
1830
 
1831
  # Check if current tab is completed
1832
  if not tab_completed_status.get(current_tab, False):
1833
+ messages = {
1834
+ 0: "Please complete the transcript analysis first.",
1835
+ 1: "Please complete the learning style quiz first.",
1836
+ 2: "Please fill out your personal information first.",
1837
+ 3: "Please save your profile first."
1838
+ }
1839
  return (
1840
  gr.Tabs(selected=current_tab),
1841
+ gr.update(
1842
+ value=f"<div class='error-message'>⚠️ {messages.get(current_tab, 'Please complete this step first')}</div>",
1843
+ visible=True
1844
+ )
1845
  )
1846
 
1847
  return gr.Tabs(selected=tab_index), gr.update(visible=False)