# -*- coding: utf-8 -*- """app2.ipynb Automatically generated by Colab. Original file is located at https://colab.research.google.com/drive/1EcIl8KoJxnisZgC7-76wSYIyis_eRKAL """ # Commented out IPython magic to ensure Python compatibility. # %%writefile app.py # import streamlit as st # import PyPDF2 # import re # import csv # import base64 # import os # Import the os module for file operations # # def extract_data_from_pdf(pdf_path): # data_list = [] # with open(pdf_path, "rb") as file: # reader = PyPDF2.PdfReader(file) # for page_num in range(2, len(reader.pages)): # single_page = reader.pages[page_num].extract_text() # data = singlePageData(single_page) # data_list.append(data) # return data_list # # def singlePageData(singlePage): # seat_no_pattern = re.compile(r"Seat No:\s*([^\s]+)") # seat_match = seat_no_pattern.search(singlePage) # seat_no = seat_match.group(1) if seat_match else "" # # prn_no_pattern = re.compile(r"PRN:\s*(\d+)") # prn_no_match = prn_no_pattern.search(singlePage) # prn_no = prn_no_match.group(1) if prn_no_match else "" # # name_pattern = re.compile(r"Name:\s*([^\n]+)") # name_match = name_pattern.search(singlePage) # name = name_match.group(1).strip() if name_match else "" # # sem3_data = semData(singlePage, 3) # sem4_data = semData(singlePage, 4) # # overall_status_pattern = re.compile(r"\|Status:\s*(\w+)\s*\|C") # overall_status_match = overall_status_pattern.search(singlePage) # overall_status = overall_status_match.group(1) if overall_status_match else "" # # percentage_match = re.compile(r"\|Percentage:\s*(\d+\.\d+)\s*\%").search(singlePage) # percentage = percentage_match.group(1) if percentage_match else "" # # return { # "Exam_Seat_No": seat_no, # "PRN_No": prn_no, # "Name": name, # "Sem3": sem3_data, # "Sem4": sem4_data, # "Status": overall_status, # "Percentage": percentage, # } # # def semData(singlePage, sem): # data = {} # subject_pattern = re.compile(fr"BTN06{sem}\d+\s*\|\s*(\S+)\s*\|\s*\S+\s*\|\s*\S+\s*\|\s*\S+\s*\|\s*(\d+)\s*\|\s*(\d+)") # matches = subject_pattern.findall(singlePage) # for match in matches: # subject_code = match[0] # ese_marks = match[1] # ise_marks = match[2] # total_marks = str(int(ese_marks) + int(ise_marks)) # data[subject_code] = { # "ESE": ese_marks, # "ISE": ise_marks, # "Total": total_marks # } # return data # # def write_data_to_csv(data_list, output_path): # fieldnames = [ # "Exam_Seat_No", "PRN_No", "Name", # "Sem3_Subject", "Sem3_ESE", "Sem3_ISE", "Sem3_Total", # "Sem4_Subject", "Sem4_ESE", "Sem4_ISE", "Sem4_Total", # "Status", "Percentage" # ] # # with open(output_path, "w", newline="") as csvfile: # writer = csv.DictWriter(csvfile, fieldnames=fieldnames) # writer.writeheader() # # for student in data_list: # sem3_data = student["Sem3"] # sem4_data = student["Sem4"] # # for subject_code, sem3_marks in sem3_data.items(): # sem4_marks = sem4_data.get(subject_code, {"ESE": "", "ISE": "", "Total": ""}) # # writer.writerow({ # "Exam_Seat_No": student["Exam_Seat_No"], # "PRN_No": student["PRN_No"], # "Name": student["Name"], # "Sem3_Subject": subject_code, # "Sem3_ESE": sem3_marks["ESE"], # "Sem3_ISE": sem3_marks["ISE"], # "Sem3_Total": sem3_marks["Total"], # "Sem4_Subject": subject_code, # "Sem4_ESE": sem4_marks["ESE"], # "Sem4_ISE": sem4_marks["ISE"], # "Sem4_Total": sem4_marks["Total"], # "Status": student["Status"], # "Percentage": student["Percentage"] # }) # # def main(): # st.title("PDF to CSV Converter") # # # File upload section # uploaded_file = st.file_uploader("Upload a PDF file", type="pdf") # # if uploaded_file is not None: # # Save the uploaded PDF file to a temporary location # input_pdf_path = save_uploaded_file(uploaded_file) # # try: # # Extract data from the PDF # data_list = extract_data_from_pdf(input_pdf_path) # # # Save extracted data to CSV # output_csv_path = "/tmp/output.csv" # write_data_to_csv(data_list, output_csv_path) # # # Provide download link for the CSV file # st.success("PDF successfully processed!") # st.markdown(get_binary_file_downloader_html(output_csv_path, "CSV"), unsafe_allow_html=True) # # except Exception as e: # st.error(f"Error encountered during PDF extraction: {str(e)}") # # def save_uploaded_file(uploaded_file): # # Save the uploaded PDF file to a temporary location # temp_dir = "/tmp/pdf_converter" # os.makedirs(temp_dir, exist_ok=True) # input_pdf_path = os.path.join(temp_dir, "input.pdf") # with open(input_pdf_path, "wb") as f: # f.write(uploaded_file.read()) # return input_pdf_path # # def get_binary_file_downloader_html(bin_file, file_label='File'): # # Generate download link for the CSV file # with open(bin_file, 'rb') as f: # data = f.read() # b64 = base64.b64encode(data).decode() # href = f'Download {file_label}' # return href # # if __name__ == "__main__": # main() #