Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -7,7 +7,6 @@ from PyPDF2 import PdfReader
|
|
| 7 |
from collections import defaultdict
|
| 8 |
|
| 9 |
# ========== TRANSCRIPT PARSING FUNCTIONS (UPDATED) ==========
|
| 10 |
-
|
| 11 |
def extract_courses_with_grade_levels(text):
|
| 12 |
grade_level_pattern = r"(Grade|Year)\s*[:]?\s*(\d+|Freshman|Sophomore|Junior|Senior)"
|
| 13 |
grade_match = re.search(grade_level_pattern, text, re.IGNORECASE)
|
|
@@ -51,12 +50,24 @@ def parse_transcript(file):
|
|
| 51 |
text = ''
|
| 52 |
reader = PdfReader(file)
|
| 53 |
for page in reader.pages:
|
| 54 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 55 |
courses_by_grade = extract_courses_with_grade_levels(text)
|
| 56 |
|
| 57 |
-
output_text = "
|
| 58 |
-
for
|
| 59 |
-
output_text += f"Grade {
|
| 60 |
for course in courses:
|
| 61 |
output_text += f"- {course['course']}"
|
| 62 |
if 'grade' in course:
|
|
@@ -64,10 +75,16 @@ def parse_transcript(file):
|
|
| 64 |
output_text += "\n"
|
| 65 |
output_text += "\n"
|
| 66 |
|
| 67 |
-
return output_text,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 68 |
else:
|
| 69 |
return "Unsupported file format", None
|
| 70 |
|
|
|
|
| 71 |
gpa = "N/A"
|
| 72 |
for col in ['GPA', 'Grade Point Average', 'Cumulative GPA']:
|
| 73 |
if col in df.columns:
|
|
|
|
| 7 |
from collections import defaultdict
|
| 8 |
|
| 9 |
# ========== TRANSCRIPT PARSING FUNCTIONS (UPDATED) ==========
|
|
|
|
| 10 |
def extract_courses_with_grade_levels(text):
|
| 11 |
grade_level_pattern = r"(Grade|Year)\s*[:]?\s*(\d+|Freshman|Sophomore|Junior|Senior)"
|
| 12 |
grade_match = re.search(grade_level_pattern, text, re.IGNORECASE)
|
|
|
|
| 50 |
text = ''
|
| 51 |
reader = PdfReader(file)
|
| 52 |
for page in reader.pages:
|
| 53 |
+
page_text = page.extract_text()
|
| 54 |
+
if page_text:
|
| 55 |
+
text += page_text + '\n'
|
| 56 |
+
|
| 57 |
+
# GPA extraction
|
| 58 |
+
gpa_match = re.search(r'GPA[:\s]*(\d\.\d{1,2})', text, re.IGNORECASE)
|
| 59 |
+
gpa = gpa_match.group(1) if gpa_match else "N/A"
|
| 60 |
+
|
| 61 |
+
# Grade level extraction
|
| 62 |
+
grade_match = re.search(r'(Grade|Year)[\s:]*(\d+|Freshman|Sophomore|Junior|Senior)', text, re.IGNORECASE)
|
| 63 |
+
grade_level = grade_match.group(2) if grade_match else "Unknown"
|
| 64 |
+
|
| 65 |
+
# Courses grouped by grade
|
| 66 |
courses_by_grade = extract_courses_with_grade_levels(text)
|
| 67 |
|
| 68 |
+
output_text = f"Grade Level: {grade_level}\nGPA: {gpa}\n\nCourses by Grade Level:\n\n"
|
| 69 |
+
for level, courses in courses_by_grade.items():
|
| 70 |
+
output_text += f"Grade {level}:\n"
|
| 71 |
for course in courses:
|
| 72 |
output_text += f"- {course['course']}"
|
| 73 |
if 'grade' in course:
|
|
|
|
| 75 |
output_text += "\n"
|
| 76 |
output_text += "\n"
|
| 77 |
|
| 78 |
+
return output_text, {
|
| 79 |
+
"gpa": gpa,
|
| 80 |
+
"grade_level": grade_level,
|
| 81 |
+
"courses": courses_by_grade
|
| 82 |
+
}
|
| 83 |
+
|
| 84 |
else:
|
| 85 |
return "Unsupported file format", None
|
| 86 |
|
| 87 |
+
# For CSV/XLSX:
|
| 88 |
gpa = "N/A"
|
| 89 |
for col in ['GPA', 'Grade Point Average', 'Cumulative GPA']:
|
| 90 |
if col in df.columns:
|