Spaces:
Runtime error
Runtime error
| # -*- coding: utf-8 -*- | |
| """app2.ipynb | |
| Automatically generated by Colab. | |
| Original file is located at | |
| https://colab.research.google.com/drive/1EcIl8KoJxnisZgC7-76wSYIyis_eRKAL | |
| """ | |
| # Commented out IPython magic to ensure Python compatibility. | |
| # %%writefile app.py | |
| # import streamlit as st | |
| # import PyPDF2 | |
| # import re | |
| # import csv | |
| # import base64 | |
| # import os # Import the os module for file operations | |
| # | |
| # def extract_data_from_pdf(pdf_path): | |
| # data_list = [] | |
| # with open(pdf_path, "rb") as file: | |
| # reader = PyPDF2.PdfReader(file) | |
| # for page_num in range(2, len(reader.pages)): | |
| # single_page = reader.pages[page_num].extract_text() | |
| # data = singlePageData(single_page) | |
| # data_list.append(data) | |
| # return data_list | |
| # | |
| # def singlePageData(singlePage): | |
| # seat_no_pattern = re.compile(r"Seat No:\s*([^\s]+)") | |
| # seat_match = seat_no_pattern.search(singlePage) | |
| # seat_no = seat_match.group(1) if seat_match else "" | |
| # | |
| # prn_no_pattern = re.compile(r"PRN:\s*(\d+)") | |
| # prn_no_match = prn_no_pattern.search(singlePage) | |
| # prn_no = prn_no_match.group(1) if prn_no_match else "" | |
| # | |
| # name_pattern = re.compile(r"Name:\s*([^\n]+)") | |
| # name_match = name_pattern.search(singlePage) | |
| # name = name_match.group(1).strip() if name_match else "" | |
| # | |
| # sem3_data = semData(singlePage, 3) | |
| # sem4_data = semData(singlePage, 4) | |
| # | |
| # overall_status_pattern = re.compile(r"\|Status:\s*(\w+)\s*\|C") | |
| # overall_status_match = overall_status_pattern.search(singlePage) | |
| # overall_status = overall_status_match.group(1) if overall_status_match else "" | |
| # | |
| # percentage_match = re.compile(r"\|Percentage:\s*(\d+\.\d+)\s*\%").search(singlePage) | |
| # percentage = percentage_match.group(1) if percentage_match else "" | |
| # | |
| # return { | |
| # "Exam_Seat_No": seat_no, | |
| # "PRN_No": prn_no, | |
| # "Name": name, | |
| # "Sem3": sem3_data, | |
| # "Sem4": sem4_data, | |
| # "Status": overall_status, | |
| # "Percentage": percentage, | |
| # } | |
| # | |
| # def semData(singlePage, sem): | |
| # data = {} | |
| # subject_pattern = re.compile(fr"BTN06{sem}\d+\s*\|\s*(\S+)\s*\|\s*\S+\s*\|\s*\S+\s*\|\s*\S+\s*\|\s*(\d+)\s*\|\s*(\d+)") | |
| # matches = subject_pattern.findall(singlePage) | |
| # for match in matches: | |
| # subject_code = match[0] | |
| # ese_marks = match[1] | |
| # ise_marks = match[2] | |
| # total_marks = str(int(ese_marks) + int(ise_marks)) | |
| # data[subject_code] = { | |
| # "ESE": ese_marks, | |
| # "ISE": ise_marks, | |
| # "Total": total_marks | |
| # } | |
| # return data | |
| # | |
| # def write_data_to_csv(data_list, output_path): | |
| # fieldnames = [ | |
| # "Exam_Seat_No", "PRN_No", "Name", | |
| # "Sem3_Subject", "Sem3_ESE", "Sem3_ISE", "Sem3_Total", | |
| # "Sem4_Subject", "Sem4_ESE", "Sem4_ISE", "Sem4_Total", | |
| # "Status", "Percentage" | |
| # ] | |
| # | |
| # with open(output_path, "w", newline="") as csvfile: | |
| # writer = csv.DictWriter(csvfile, fieldnames=fieldnames) | |
| # writer.writeheader() | |
| # | |
| # for student in data_list: | |
| # sem3_data = student["Sem3"] | |
| # sem4_data = student["Sem4"] | |
| # | |
| # for subject_code, sem3_marks in sem3_data.items(): | |
| # sem4_marks = sem4_data.get(subject_code, {"ESE": "", "ISE": "", "Total": ""}) | |
| # | |
| # writer.writerow({ | |
| # "Exam_Seat_No": student["Exam_Seat_No"], | |
| # "PRN_No": student["PRN_No"], | |
| # "Name": student["Name"], | |
| # "Sem3_Subject": subject_code, | |
| # "Sem3_ESE": sem3_marks["ESE"], | |
| # "Sem3_ISE": sem3_marks["ISE"], | |
| # "Sem3_Total": sem3_marks["Total"], | |
| # "Sem4_Subject": subject_code, | |
| # "Sem4_ESE": sem4_marks["ESE"], | |
| # "Sem4_ISE": sem4_marks["ISE"], | |
| # "Sem4_Total": sem4_marks["Total"], | |
| # "Status": student["Status"], | |
| # "Percentage": student["Percentage"] | |
| # }) | |
| # | |
| # def main(): | |
| # st.title("PDF to CSV Converter") | |
| # | |
| # # File upload section | |
| # uploaded_file = st.file_uploader("Upload a PDF file", type="pdf") | |
| # | |
| # if uploaded_file is not None: | |
| # # Save the uploaded PDF file to a temporary location | |
| # input_pdf_path = save_uploaded_file(uploaded_file) | |
| # | |
| # try: | |
| # # Extract data from the PDF | |
| # data_list = extract_data_from_pdf(input_pdf_path) | |
| # | |
| # # Save extracted data to CSV | |
| # output_csv_path = "/tmp/output.csv" | |
| # write_data_to_csv(data_list, output_csv_path) | |
| # | |
| # # Provide download link for the CSV file | |
| # st.success("PDF successfully processed!") | |
| # st.markdown(get_binary_file_downloader_html(output_csv_path, "CSV"), unsafe_allow_html=True) | |
| # | |
| # except Exception as e: | |
| # st.error(f"Error encountered during PDF extraction: {str(e)}") | |
| # | |
| # def save_uploaded_file(uploaded_file): | |
| # # Save the uploaded PDF file to a temporary location | |
| # temp_dir = "/tmp/pdf_converter" | |
| # os.makedirs(temp_dir, exist_ok=True) | |
| # input_pdf_path = os.path.join(temp_dir, "input.pdf") | |
| # with open(input_pdf_path, "wb") as f: | |
| # f.write(uploaded_file.read()) | |
| # return input_pdf_path | |
| # | |
| # def get_binary_file_downloader_html(bin_file, file_label='File'): | |
| # # Generate download link for the CSV file | |
| # with open(bin_file, 'rb') as f: | |
| # data = f.read() | |
| # b64 = base64.b64encode(data).decode() | |
| # href = f'<a href="data:application/octet-stream;base64,{b64}" download="{os.path.basename(bin_file)}">Download {file_label}</a>' | |
| # return href | |
| # | |
| # if __name__ == "__main__": | |
| # main() | |
| # |