Spaces:

Dannyar608
/

Final_project

Runtime error

App Files Files Community

Dannyar608 commited on May 16, 2025

Commit

41f6b04

verified ·

1 Parent(s): 48e62d8

Update app.py

Browse files

Files changed (1) hide show

app.py +150 -192

app.py CHANGED Viewed

@@ -159,11 +159,11 @@ def validate_name(name: str) -> str:
     """Validate name input."""
     name = name.strip()
     if not name:
-        raise gr.Error("Name cannot be empty")
     if len(name) > 100:
-        raise gr.Error("Name is too long (max 100 characters)")
     if any(c.isdigit() for c in name):
-        raise gr.Error("Name cannot contain numbers")
     return name
 def validate_age(age: Union[int, float, str]) -> int:
@@ -171,23 +171,23 @@ def validate_age(age: Union[int, float, str]) -> int:
     try:
         age_int = int(age)
         if not MIN_AGE <= age_int <= MAX_AGE:
-            raise gr.Error(f"Age must be between {MIN_AGE} and {MAX_AGE}")
         return age_int
     except (ValueError, TypeError):
-        raise gr.Error("Please enter a valid age number")
 def validate_file(file_obj) -> None:
     """Validate uploaded file."""
     if not file_obj:
-        raise ValueError("No file uploaded")
     file_ext = os.path.splitext(file_obj.name)[1].lower()
     if file_ext not in ALLOWED_FILE_TYPES:
-        raise gr.Error(f"Invalid file type. Allowed: {', '.join(ALLOWED_FILE_TYPES)}")
     file_size = os.path.getsize(file_obj.name) / (1024 * 1024)  # MB
     if file_size > MAX_FILE_SIZE_MB:
-        raise gr.Error(f"File too large. Max size: {MAX_FILE_SIZE_MB}MB")
 # ========== TEXT EXTRACTION FUNCTIONS ==========
 def extract_text_from_file(file_path: str, file_ext: str) -> str:
@@ -214,7 +214,7 @@ def extract_text_from_file(file_path: str, file_ext: str) -> str:
         text = clean_extracted_text(text)
         if not text.strip():
-            raise ValueError("No text could be extracted from the file. Please ensure the file is clear and readable.")
         return text
@@ -331,21 +331,89 @@ class TranscriptParser:
         return 'standard'
     def parse_miami_dade(self, text: str) -> Dict:
-        """Parse Miami-Dade formatted transcripts"""
-        self._extract_student_info(text)
-        self._extract_requirements(text)
-        self._extract_course_history(text)
-        self._extract_current_courses(text)
-        self._calculate_completion()
-        return {
-            "student_info": self.student_data,
-            "requirements": self.requirements,
-            "current_courses": self.current_courses,
-            "course_history": self.course_history,
-            "graduation_status": self.graduation_status,
-            "format": "miami_dade"
-        }
     def parse_standard(self, text: str) -> Dict:
         """Parse standard formatted transcripts"""
@@ -419,148 +487,7 @@ class TranscriptParser:
             "format": "homeschool"
         }
-    def _extract_student_info(self, text: str):
-        """Enhanced student info extraction for Miami-Dade format"""
-        student_pattern = r"(\d{7})\s*-\s*([A-Z]+,\s*[A-Z]+)\s*Current Grade:\s*(\d+)\s*YOG\s*(\d{4})"
-        student_match = re.search(student_pattern, text, re.IGNORECASE)
-        if student_match:
-            self.student_data = {
-                "id": student_match.group(1),
-                "name": student_match.group(2).replace(",", ", "),
-                "current_grade": student_match.group(3),
-                "graduation_year": student_match.group(4)
-            }
-        else:
-            # Fallback pattern for alternative formats
-            fallback_pattern = r"Student:\s*([^\n]+)\s*ID:\s*(\d+)\s*Grade:\s*(\d+)"
-            fallback_match = re.search(fallback_pattern, text, re.IGNORECASE)
-            if fallback_match:
-                self.student_data = {
-                    "name": fallback_match.group(1).strip(),
-                    "id": fallback_match.group(2),
-                    "current_grade": fallback_match.group(3),
-                    "graduation_year": "Unknown"
-                }
-        # Extract GPA info
-        gpa_pattern = r"Un-weighted GPA\s*([\d.]+).*?Weighted GPA\s*([\d.]+)"
-        gpa_match = re.search(gpa_pattern, text, re.IGNORECASE)
-        if gpa_match:
-            self.student_data.update({
-                "unweighted_gpa": float(gpa_match.group(1)),
-                "weighted_gpa": float(gpa_match.group(2))
-            })
-        else:
-            # Try alternative GPA patterns
-            alt_gpa_pattern = r"GPA\s*([\d.]+)\s*/\s*([\d.]+)"
-            alt_match = re.search(alt_gpa_pattern, text)
-            if alt_match:
-                self.student_data.update({
-                    "unweighted_gpa": float(alt_match.group(1)),
-                    "weighted_gpa": float(alt_match.group(2))
-                })
-        # Extract credits and service hours
-        credits_pattern = r"Total Credits Earned\s*([\d.]+).*?Comm Serv Hours\s*(\d+)"
-        credits_match = re.search(credits_pattern, text, re.IGNORECASE)
-        if credits_match:
-            self.student_data.update({
-                "total_credits": float(credits_match.group(1)),
-                "community_service_hours": int(credits_match.group(2))
-            })
-    def _extract_requirements(self, text: str):
-        """Parse the graduation requirements section with improved table parsing"""
-        # Find the requirements table
-        req_table_start = re.search(r"Code\s*Description\s*Required\s*Waived\s*Completed\s*Status", text)
-        if not req_table_start:
-            # Try alternative table headers
-            req_table_start = re.search(r"Requirement\s*Req\s*Comp\s*Status", text)
-            if not req_table_start:
-                raise ValueError("Could not find requirements table header")
-        req_text = text[req_table_start.start():]
-        # Extract individual requirements
-        req_pattern = (
-            r"([A-Z]-[\w\s\(\)&]+)\s*"  # Code
-            r"([^\|]+)\s*"  # Description
-            r"([\d.]+)\s*"  # Required
-            r"([\d.]+)\s*"  # Waived
-            r"([\d.]+)\s*"  # Completed
-            r"([\d.]+)\s*%"  # Status
-        )
-        req_matches = re.finditer(req_pattern, req_text)
-        for match in req_matches:
-            req_code = match.group(1).strip()
-            self.requirements[req_code] = {
-                "description": match.group(2).strip(),
-                "required": float(match.group(3)),
-                "waived": float(match.group(4)),
-                "completed": float(match.group(5)),
-                "status": f"{match.group(6)}%"
-            }
-        # Extract total requirements
-        total_pattern = r"Total\s*([\d.]+)\s*([\d.]+)\s*([\d.]+)\s*([\d.]+)%"
-        total_match = re.search(total_pattern, req_text)
-        if total_match:
-            self.graduation_status["total_requirements"] = {
-                "required": float(total_match.group(1)),
-                "waived": float(total_match.group(2)),
-                "completed": float(total_match.group(3)),
-                "percent_complete": float(total_match.group(4))
-            }
-    def _extract_course_history(self, text: str):
-        """Parse the detailed course history with improved pattern matching"""
-        # Find the course history table
-        course_header = re.search(r"Requirement\s*School Year\s*GradeLv1\s*CrsNu m\s*Description\s*Term\s*DstNumber\s*FG\s*Incl\s*Credits", text)
-        if not course_header:
-            # Try alternative course history headers
-            course_header = re.search(r"Course\s*Grade\s*Credit\s*Year", text)
-            if not course_header:
-                raise ValueError("Could not find course history table header")
-        course_text = text[course_header.start():]
-        # Extract individual courses
-        course_pattern = (
-            r"([A-Z]-[\w\s\(\)&-]+)\s*"  # Requirement
-            r"(\d{4}-\d{4})\s*"  # School Year
-            r"(\d{2})\s*"  # Grade Level
-            r"([A-Z0-9]+)\s*"  # Course Number
-            r"([^\|]+)\s*"  # Description
-            r"([A-Z0-9]+)\s*"  # Term
-            r"([A-Z0-9]+)\s*"  # District Number
-            r"([A-Z])\s*"  # Final Grade
-            r"([A-Z])\s*"  # Inclusion Status
-            r"([\d.]+|inProgress)"  # Credits
-        )
-        course_matches = re.finditer(course_pattern, course_text)
-        for match in course_matches:
-            self.course_history.append({
-                "requirement_category": match.group(1).strip(),
-                "school_year": match.group(2),
-                "grade_level": match.group(3),
-                "course_code": match.group(4),
-                "description": match.group(5).strip(),
-                "term": match.group(6),
-                "district_number": match.group(7),
-                "grade": match.group(8),
-                "inclusion_status": match.group(9),
-                "credits": match.group(10)
-            })
-    def _extract_current_courses(self, text: str):
         """Identify courses currently in progress"""
         self.current_courses = [
             {
@@ -572,7 +499,7 @@ class TranscriptParser:
                 "grade_level": c["grade_level"]
             }
             for c in self.course_history
-            if c["credits"].lower() == "inprogress"
         ]
     def _calculate_completion(self):
@@ -1023,12 +950,32 @@ class ProfileManager:
                     movie: str, movie_reason: str, show: str, show_reason: str,
                     book: str, book_reason: str, character: str, character_reason: str,
                     blog: str) -> str:
-        """Save student profile with validation."""
         try:
-            # Validate required fields
-            name = validate_name(name)
-            age = validate_age(age)
-            interests = sanitize_input(interests)
             # Prepare favorites data
             favorites = {
@@ -1045,8 +992,8 @@ class ProfileManager:
             # Prepare full profile data
             data = {
                 "name": name,
-                "age": age,
-                "interests": interests,
                 "transcript": transcript if transcript else {},
                 "learning_style": learning_style if learning_style else "Not assessed",
                 "favorites": favorites,
@@ -1076,8 +1023,8 @@ class ProfileManager:
             return self._generate_profile_summary(data)
         except Exception as e:
-            logging.error(f"Error saving profile: {str(e)}")
-            raise gr.Error(f"Error saving profile: {str(e)}")
     def load_profile(self, name: str = None, session_token: str = None) -> Dict:
         """Load profile by name or return the first one found."""
@@ -1544,22 +1491,11 @@ def create_interface():
                 def process_transcript(file_obj, current_tab_status):
                     try:
                         if not file_obj:
-                            raise ValueError("Please upload a file first")
                         output_text, data = parse_transcript(file_obj)
-                        if "Error" not in output_text:
-                            new_status = current_tab_status.copy()
-                            new_status[0] = True
-                            return (
-                                output_text,
-                                data,
-                                new_status,
-                                gr.update(elem_classes="completed-tab"),
-                                gr.update(interactive=True),
-                                gr.update(visible=False),
-                                gr.update(visible=False)
-                            )
-                        else:
                             return (
                                 output_text,
                                 None,
@@ -1569,10 +1505,23 @@ def create_interface():
                                 gr.update(visible=True, value=f"<div class='error-message'>{output_text}</div>"),
                                 gr.update(visible=False)
                             )
                     except Exception as e:
-                        error_msg = f"❌ Error: {str(e)}"
                         if "PDF" in str(e):
-                            error_msg += "\n\nTIPS FOR PDF FILES:\n1. Try opening and re-saving the PDF\n2. Ensure it's not password protected\n3. Try converting to an image"
                         return (
                             error_msg,
                             None,
@@ -1881,9 +1830,18 @@ def create_interface():
             # Check if current tab is completed
             if not tab_completed_status.get(current_tab, False):
                 return (
                     gr.Tabs(selected=current_tab),
-                    gr.update(value=f"<div class='error-message'>⚠️ Please complete Step {current_tab+1} first!</div>", visible=True)
                 )
             return gr.Tabs(selected=tab_index), gr.update(visible=False)

     """Validate name input."""
     name = name.strip()
     if not name:
+        raise ValueError("Name cannot be empty. Please enter your full name.")
     if len(name) > 100:
+        raise ValueError("Name is too long (maximum 100 characters).")
     if any(c.isdigit() for c in name):
+        raise ValueError("Name cannot contain numbers.")
     return name
 def validate_age(age: Union[int, float, str]) -> int:
     try:
         age_int = int(age)
         if not MIN_AGE <= age_int <= MAX_AGE:
+            raise ValueError(f"Age must be between {MIN_AGE} and {MAX_AGE}.")
         return age_int
     except (ValueError, TypeError):
+        raise ValueError("Please enter a valid age number.")
 def validate_file(file_obj) -> None:
     """Validate uploaded file."""
     if not file_obj:
+        raise ValueError("Please upload a file first.")
     file_ext = os.path.splitext(file_obj.name)[1].lower()
     if file_ext not in ALLOWED_FILE_TYPES:
+        raise ValueError(f"Invalid file type. Allowed types: {', '.join(ALLOWED_FILE_TYPES)}")
     file_size = os.path.getsize(file_obj.name) / (1024 * 1024)  # MB
     if file_size > MAX_FILE_SIZE_MB:
+        raise ValueError(f"File too large. Maximum size is {MAX_FILE_SIZE_MB}MB.")
 # ========== TEXT EXTRACTION FUNCTIONS ==========
 def extract_text_from_file(file_path: str, file_ext: str) -> str:
         text = clean_extracted_text(text)
         if not text.strip():
+            raise ValueError("No text could be extracted. Please ensure the file is clear and readable.")
         return text
         return 'standard'
     def parse_miami_dade(self, text: str) -> Dict:
+        """Parse Miami-Dade formatted transcripts with enhanced error handling"""
+        try:
+            # Extract student info with more robust patterns
+            student_info = re.search(
+                r"(\d{7})\s*-\s*([A-Z]+,\s*[A-Z]+).*?Current Grade:\s*(\d+)\s*YOG\s*(\d{4})"
+                r".*?Un-weighted GPA\s*([\d.]+).*?Weighted GPA\s*([\d.]+)"
+                r".*?Total Credits Earned\s*([\d.]+).*?Comm Serv Hours\s*(\d+)",
+                text, re.DOTALL
+            )
+            if student_info:
+                self.student_data = {
+                    "id": student_info.group(1),
+                    "name": student_info.group(2).replace(",", ", "),
+                    "current_grade": student_info.group(3),
+                    "graduation_year": student_info.group(4),
+                    "unweighted_gpa": float(student_info.group(5)),
+                    "weighted_gpa": float(student_info.group(6)),
+                    "total_credits": float(student_info.group(7)),
+                    "community_service_hours": int(student_info.group(8))
+                }
+            # Extract requirements with better table parsing
+            req_table = re.search(
+                r"Code\s*Description\s*Required\s*Waived\s*Completed\s*Status(.*?)Total\s*[\d.]+\s*[\d.]+\s*[\d.]+\s*[\d.]+%",
+                text, re.DOTALL
+            )
+            if req_table:
+                req_matches = re.finditer(
+                    r"([A-Z]-[\w\s\(\)&]+)\s*([^\n]+?)\s*([\d.]+)\s*([\d.]+)\s*([\d.]+)\s*([\d.]+)%",
+                    req_table.group(1)
+                )
+                for match in req_matches:
+                    req_code = match.group(1).strip()
+                    self.requirements[req_code] = {
+                        "description": match.group(2).strip(),
+                        "required": float(match.group(3)),
+                        "waived": float(match.group(4)),
+                        "completed": float(match.group(5)),
+                        "status": f"{match.group(6)}%"
+                    }
+            # Extract course history with more flexible parsing
+            course_section = re.search(r"Requirement\s*School Year\s*GradeLv1\s*CrsNu m\s*Description(.*?)(?=Legend for Incl:|$)", text, re.DOTALL)
+            if course_section:
+                course_matches = re.finditer(
+                    r"([A-Z]-[\w\s\(\)&-]+)\s*(\d{4}-\d{4}|\d{1,2})\s*(\d{2})\s*([A-Z0-9]+)\s*([^\n]+?)\s*([A-Z0-9]+)\s*([A-Z0-9]+)\s*([A-Z])\s*([A-Z])\s*([\d.]+|inProgress)",
+                    course_section.group(1)
+                )
+                for match in course_matches:
+                    self.course_history.append({
+                        "requirement_category": match.group(1).strip(),
+                        "school_year": match.group(2),
+                        "grade_level": match.group(3),
+                        "course_code": match.group(4),
+                        "description": match.group(5).strip(),
+                        "term": match.group(6),
+                        "district_number": match.group(7),
+                        "grade": match.group(8),
+                        "inclusion_status": match.group(9),
+                        "credits": match.group(10)
+                    })
+            # Identify current courses
+            self._extract_current_courses()
+            self._calculate_completion()
+            return {
+                "student_info": self.student_data,
+                "requirements": self.requirements,
+                "current_courses": self.current_courses,
+                "course_history": self.course_history,
+                "graduation_status": self.graduation_status,
+                "format": "miami_dade"
+            }
+        except Exception as e:
+            logging.error(f"Error parsing Miami-Dade transcript: {str(e)}")
+            raise ValueError(f"Couldn't parse transcript. Please ensure it's a valid Miami-Dade transcript. Error: {str(e)}")
     def parse_standard(self, text: str) -> Dict:
         """Parse standard formatted transcripts"""
             "format": "homeschool"
         }
+    def _extract_current_courses(self):
         """Identify courses currently in progress"""
         self.current_courses = [
             {
                 "grade_level": c["grade_level"]
             }
             for c in self.course_history
+            if isinstance(c["credits"], str) and c["credits"].lower() == "inprogress"
         ]
     def _calculate_completion(self):
                     movie: str, movie_reason: str, show: str, show_reason: str,
                     book: str, book_reason: str, character: str, character_reason: str,
                     blog: str) -> str:
+        """Save student profile with better validation messages"""
         try:
+            # Validate required fields with specific messages
+            if not name.strip():
+                raise ValueError("Name cannot be empty. Please enter your full name.")
+            if len(name) > 100:
+                raise ValueError("Name is too long (maximum 100 characters).")
+            if any(c.isdigit() for c in name):
+                raise ValueError("Name cannot contain numbers.")
+            try:
+                age_int = int(age)
+                if not MIN_AGE <= age_int <= MAX_AGE:
+                    raise ValueError(f"Age must be between {MIN_AGE} and {MAX_AGE}.")
+            except (ValueError, TypeError):
+                raise ValueError("Please enter a valid age number.")
+            if not interests.strip():
+                raise ValueError("Please describe at least one interest or hobby.")
+            if not transcript:
+                raise ValueError("Please complete the transcript analysis first.")
+            # Validate learning style quiz completion
+            if not learning_style or "Your primary learning style is:" not in learning_style:
+                raise ValueError("Please complete the learning style quiz first.")
             # Prepare favorites data
             favorites = {
             # Prepare full profile data
             data = {
                 "name": name,
+                "age": age_int,
+                "interests": sanitize_input(interests),
                 "transcript": transcript if transcript else {},
                 "learning_style": learning_style if learning_style else "Not assessed",
                 "favorites": favorites,
             return self._generate_profile_summary(data)
         except Exception as e:
+            logging.error(f"Profile validation error: {str(e)}")
+            raise gr.Error(f"Couldn't save profile: {str(e)}")
     def load_profile(self, name: str = None, session_token: str = None) -> Dict:
         """Load profile by name or return the first one found."""
                 def process_transcript(file_obj, current_tab_status):
                     try:
                         if not file_obj:
+                            raise ValueError("Please upload a transcript file first.")
                         output_text, data = parse_transcript(file_obj)
+                        if "Error" in output_text:
                             return (
                                 output_text,
                                 None,
                                 gr.update(visible=True, value=f"<div class='error-message'>{output_text}</div>"),
                                 gr.update(visible=False)
                             )
+                        new_status = current_tab_status.copy()
+                        new_status[0] = True
+                        return (
+                            output_text,
+                            data,
+                            new_status,
+                            gr.update(elem_classes="completed-tab"),
+                            gr.update(interactive=True),
+                            gr.update(visible=False),
+                            gr.update(visible=False)
+                        )
                     except Exception as e:
+                        error_msg = f"Error processing transcript: {str(e)}"
                         if "PDF" in str(e):
+                            error_msg += "\n\nTIPS:\n- Try re-saving the PDF\n- Ensure it's not password protected\n- Try converting to an image"
                         return (
                             error_msg,
                             None,
             # Check if current tab is completed
             if not tab_completed_status.get(current_tab, False):
+                messages = {
+                    0: "Please complete the transcript analysis first.",
+                    1: "Please complete the learning style quiz first.",
+                    2: "Please fill out your personal information first.",
+                    3: "Please save your profile first."
+                }
                 return (
                     gr.Tabs(selected=current_tab),
+                    gr.update(
+                        value=f"<div class='error-message'>⚠️ {messages.get(current_tab, 'Please complete this step first')}</div>",
+                        visible=True
+                    )
                 )
             return gr.Tabs(selected=tab_index), gr.update(visible=False)