Spaces:

Dannyar608
/

Final_project

Runtime error

App Files Files Community

Dannyar608 commited on May 20, 2025

Commit

acac7a6

verified ·

1 Parent(s): ebc14af

Update app.py

Browse files

Files changed (1) hide show

app.py +78 -43

app.py CHANGED Viewed

@@ -338,6 +338,23 @@ class GraduationProgress(BaseModel):
     courses: List[Course]
     assessments: Dict[str, str]
 class TranscriptParser:
     def __init__(self):
         self.student_data = {}
@@ -351,20 +368,25 @@ class TranscriptParser:
         try:
             text = preprocess_text(text)
-            # First try the new detailed parser
-            parsed_data = self._parse_detailed_transcript(text)
             if parsed_data:
                 return parsed_data
             # Fall back to simplified parser if detailed parsing fails
-            return self._parse_simplified_transcript(text)
         except Exception as e:
             logging.error(f"Error parsing transcript: {str(e)}")
             raise ValueError(f"Couldn't parse transcript content. Error: {str(e)}")
-    def _parse_detailed_transcript(self, text: str) -> Optional[Dict]:
-        """Parse detailed transcript format with improved patterns for Miami-Dade format"""
         try:
             parsed_data = {
                 'student_info': {},
@@ -385,7 +407,7 @@ class TranscriptParser:
                 parsed_data['student_info']['grade'] = student_info_match.group(3)
                 parsed_data['student_info']['year_of_graduation'] = student_info_match.group(4)
-            # More robust GPA extraction
             gpa_matches = re.findall(
                 r"(?:Un.?weighted|Weighted)\s*GPA\s*([\d.]+)",
                 text,
@@ -415,46 +437,56 @@ class TranscriptParser:
             if virtual_grade_match:
                 parsed_data['student_info']['virtual_grade'] = virtual_grade_match.group(1)
-            # Extract requirements
-            req_section = re.search(r"Code\s*Description\s*Required\s*Waived\s*Completed\s*Status(.*?)(?:\n\s*\n|$)", text, re.DOTALL)
             if req_section:
                 req_lines = [line.strip() for line in req_section.group(1).split('\n') if line.strip()]
                 for line in req_lines:
                     if '|' in line:  # Table format
-                        parts = [part.strip() for part in line.split('|')]
-                        if len(parts) >= 6:
-                            code = parts[0]
-                            description = parts[1]
-                            required = float(parts[2]) if parts[2] and parts[2].replace('.','').isdigit() else 0.0
-                            waived = float(parts[3]) if parts[3] and parts[3].replace('.','').isdigit() else 0.0
-                            completed = float(parts[4]) if parts[4] and parts[4].replace('.','').isdigit() else 0.0
-                            status = parts[5]
-                            # Extract percentage if available
-                            percent = 0.0
-                            percent_match = re.search(r"(\d+)%", status)
-                            if percent_match:
-                                percent = float(percent_match.group(1))
-                            parsed_data['requirements'][code] = {
-                                "description": description,
-                                "required": required,
-                                "waived": waived,
-                                "completed": completed,
-                                "percent_complete": percent,
-                                "status": status
-                            }
-            # Extract assessments
-            assess_section = re.search(r"Z-Assessment.*?\n(.*?)(?:\n\s*\n|$)", text, re.DOTALL)
             if assess_section:
                 assess_lines = [line.strip() for line in assess_section.group(1).split('\n') if line.strip()]
                 for line in assess_lines:
                     if '|' in line:
-                        parts = [part.strip() for part in line.split('|')]
                         if len(parts) >= 5 and parts[0].startswith('Z-'):
                             name = parts[0].replace('Z-', '').strip()
-                            status = parts[4]
                             parsed_data['assessments'][name] = status
             # Extract course history with more fault-tolerant parsing
@@ -471,10 +503,10 @@ class TranscriptParser:
                 ]
                 for line in course_lines:
-                    parts = [part.strip() for part in line.split('|')]
-                    # Handle varying number of columns
-                    if len(parts) >= 9:
                         course = {
                             'requirement': parts[0] if len(parts) > 0 else "",
                             'school_year': parts[1] if len(parts) > 1 else "",
@@ -489,17 +521,20 @@ class TranscriptParser:
                         }
                         # Handle "inProgress" and empty credits
-                        if "inProgress" in course['credits'].lower() or not course['credits']:
                             course['credits'] = "0"
                         elif not course['credits'].replace('.','').isdigit():
                             course['credits'] = "0"
                         parsed_data['course_history'].append(course)
             return parsed_data
         except Exception as e:
-            logging.warning(f"Detailed transcript parsing failed: {str(e)}")
             return None
     def _parse_simplified_transcript(self, text: str) -> Dict:
@@ -544,7 +579,7 @@ class TranscriptParser:
                 logging.warning(f"Pattern {pattern} failed: {str(e)}")
                 continue
-        raise ValueError("Could not identify course information in transcript")
 # ========== ENHANCED ANALYSIS FUNCTIONS ==========
 def analyze_gpa(parsed_data: Dict) -> str:
@@ -955,11 +990,11 @@ class LearningStyleQuiz:
             result += "You may benefit from combining different learning approaches:\n"
             for style in primary_styles:
                 result += f"\n**{style}** techniques:\n"
-                for tip in self.learning_styles[style]['tips'][:2]:
                     result += f"- {tip}\n"
                 result += f"\n**{style}** career suggestions:\n"
-                for career in self.learning_styles[style]['careers'][:3]:
                     result += f"- {career}\n"
         return result

     courses: List[Course]
     assessments: Dict[str, str]
+def validate_parsed_data(parsed_data: Dict) -> bool:
+    """Ensure all critical fields exist"""
+    required_fields = [
+        ('student_info', 'name'),
+        ('student_info', 'weighted_gpa'),
+        ('requirements', 'A-English'),  # Sample requirement
+        ('course_history', 0)  # At least one course
+    ]
+    for path in required_fields:
+        current = parsed_data
+        for key in path:
+            if key not in current:
+                raise ValueError(f"Missing critical field: {'.'.join(path)}")
+            current = current[key]
+    return True
 class TranscriptParser:
     def __init__(self):
         self.student_data = {}
         try:
             text = preprocess_text(text)
+            # First try the specialized Miami-Dade parser
+            parsed_data = self._parse_miami_dade_transcript(text)
             if parsed_data:
+                validate_parsed_data(parsed_data)
                 return parsed_data
             # Fall back to simplified parser if detailed parsing fails
+            parsed_data = self._parse_simplified_transcript(text)
+            if parsed_data:
+                return parsed_data
+            raise ValueError("No data could be parsed from the transcript")
         except Exception as e:
             logging.error(f"Error parsing transcript: {str(e)}")
             raise ValueError(f"Couldn't parse transcript content. Error: {str(e)}")
+    def _parse_miami_dade_transcript(self, text: str) -> Optional[Dict]:
+        """Specialized parser for Miami-Dade County Public Schools transcripts"""
         try:
             parsed_data = {
                 'student_info': {},
                 parsed_data['student_info']['grade'] = student_info_match.group(3)
                 parsed_data['student_info']['year_of_graduation'] = student_info_match.group(4)
+            # Extract GPA information
             gpa_matches = re.findall(
                 r"(?:Un.?weighted|Weighted)\s*GPA\s*([\d.]+)",
                 text,
             if virtual_grade_match:
                 parsed_data['student_info']['virtual_grade'] = virtual_grade_match.group(1)
+            # Extract requirements section - more robust table parsing
+            req_section = re.search(
+                r"Code\s*Description\s*Required\s*Waived\s*Completed\s*Status(.*?)(?:\n\s*\n|$)",
+                text,
+                re.DOTALL | re.IGNORECASE
+            )
             if req_section:
                 req_lines = [line.strip() for line in req_section.group(1).split('\n') if line.strip()]
                 for line in req_lines:
                     if '|' in line:  # Table format
+                        parts = [part.strip() for part in line.split('|') if part.strip()]
+                        if len(parts) >= 5:  # More lenient check for number of columns
+                            try:
+                                code = parts[0] if len(parts) > 0 else ""
+                                description = parts[1] if len(parts) > 1 else ""
+                                required = float(parts[2]) if len(parts) > 2 and parts[2].replace('.','').isdigit() else 0.0
+                                waived = float(parts[3]) if len(parts) > 3 and parts[3].replace('.','').isdigit() else 0.0
+                                completed = float(parts[4]) if len(parts) > 4 and parts[4].replace('.','').isdigit() else 0.0
+                                status = parts[5] if len(parts) > 5 else ""
+                                # Extract percentage if available
+                                percent = 0.0
+                                if status:
+                                    percent_match = re.search(r"(\d+)%", status)
+                                    if percent_match:
+                                        percent = float(percent_match.group(1))
+                                parsed_data['requirements'][code] = {
+                                    "description": description,
+                                    "required": required,
+                                    "waived": waived,
+                                    "completed": completed,
+                                    "percent_complete": percent,
+                                    "status": status
+                                }
+                            except (IndexError, ValueError) as e:
+                                logging.warning(f"Skipping malformed requirement line: {line}. Error: {str(e)}")
+                                continue
+            # Extract assessments section
+            assess_section = re.search(r"Z-Assessment.*?\n(.*?)(?:\n\s*\n|$)", text, re.DOTALL | re.IGNORECASE)
             if assess_section:
                 assess_lines = [line.strip() for line in assess_section.group(1).split('\n') if line.strip()]
                 for line in assess_lines:
                     if '|' in line:
+                        parts = [part.strip() for part in line.split('|') if part.strip()]
                         if len(parts) >= 5 and parts[0].startswith('Z-'):
                             name = parts[0].replace('Z-', '').strip()
+                            status = parts[4] if len(parts) > 4 else ""
                             parsed_data['assessments'][name] = status
             # Extract course history with more fault-tolerant parsing
                 ]
                 for line in course_lines:
+                    parts = [part.strip() for part in line.split('|') if part.strip()]
+                    # More robust handling of course data
+                    try:
                         course = {
                             'requirement': parts[0] if len(parts) > 0 else "",
                             'school_year': parts[1] if len(parts) > 1 else "",
                         }
                         # Handle "inProgress" and empty credits
+                        if "inprogress" in course['credits'].lower() or not course['credits']:
                             course['credits'] = "0"
                         elif not course['credits'].replace('.','').isdigit():
                             course['credits'] = "0"
                         parsed_data['course_history'].append(course)
+                    except (IndexError, ValueError) as e:
+                        logging.warning(f"Skipping malformed course line: {line}. Error: {str(e)}")
+                        continue
             return parsed_data
         except Exception as e:
+            logging.warning(f"Miami-Dade transcript parsing failed: {str(e)}")
             return None
     def _parse_simplified_transcript(self, text: str) -> Dict:
                 logging.warning(f"Pattern {pattern} failed: {str(e)}")
                 continue
+        return None
 # ========== ENHANCED ANALYSIS FUNCTIONS ==========
 def analyze_gpa(parsed_data: Dict) -> str:
             result += "You may benefit from combining different learning approaches:\n"
             for style in primary_styles:
                 result += f"\n**{style}** techniques:\n"
+                for tip in style_info['tips'][:2]:
                     result += f"- {tip}\n"
                 result += f"\n**{style}** career suggestions:\n"
+                for career in style_info['careers'][:3]:
                     result += f"- {career}\n"
         return result