Krish30 commited on
Commit
4f5bb45
·
verified ·
1 Parent(s): ea2f8d4

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +157 -0
app.py ADDED
@@ -0,0 +1,157 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """app2.ipynb
3
+
4
+ Automatically generated by Colab.
5
+
6
+ Original file is located at
7
+ https://colab.research.google.com/drive/1EcIl8KoJxnisZgC7-76wSYIyis_eRKAL
8
+ """
9
+
10
+ # Commented out IPython magic to ensure Python compatibility.
11
+ # %%writefile app.py
12
+ # import streamlit as st
13
+ # import PyPDF2
14
+ # import re
15
+ # import csv
16
+ # import base64
17
+ # import os # Import the os module for file operations
18
+ #
19
+ # def extract_data_from_pdf(pdf_path):
20
+ # data_list = []
21
+ # with open(pdf_path, "rb") as file:
22
+ # reader = PyPDF2.PdfReader(file)
23
+ # for page_num in range(2, len(reader.pages)):
24
+ # single_page = reader.pages[page_num].extract_text()
25
+ # data = singlePageData(single_page)
26
+ # data_list.append(data)
27
+ # return data_list
28
+ #
29
+ # def singlePageData(singlePage):
30
+ # seat_no_pattern = re.compile(r"Seat No:\s*([^\s]+)")
31
+ # seat_match = seat_no_pattern.search(singlePage)
32
+ # seat_no = seat_match.group(1) if seat_match else ""
33
+ #
34
+ # prn_no_pattern = re.compile(r"PRN:\s*(\d+)")
35
+ # prn_no_match = prn_no_pattern.search(singlePage)
36
+ # prn_no = prn_no_match.group(1) if prn_no_match else ""
37
+ #
38
+ # name_pattern = re.compile(r"Name:\s*([^\n]+)")
39
+ # name_match = name_pattern.search(singlePage)
40
+ # name = name_match.group(1).strip() if name_match else ""
41
+ #
42
+ # sem3_data = semData(singlePage, 3)
43
+ # sem4_data = semData(singlePage, 4)
44
+ #
45
+ # overall_status_pattern = re.compile(r"\|Status:\s*(\w+)\s*\|C")
46
+ # overall_status_match = overall_status_pattern.search(singlePage)
47
+ # overall_status = overall_status_match.group(1) if overall_status_match else ""
48
+ #
49
+ # percentage_match = re.compile(r"\|Percentage:\s*(\d+\.\d+)\s*\%").search(singlePage)
50
+ # percentage = percentage_match.group(1) if percentage_match else ""
51
+ #
52
+ # return {
53
+ # "Exam_Seat_No": seat_no,
54
+ # "PRN_No": prn_no,
55
+ # "Name": name,
56
+ # "Sem3": sem3_data,
57
+ # "Sem4": sem4_data,
58
+ # "Status": overall_status,
59
+ # "Percentage": percentage,
60
+ # }
61
+ #
62
+ # def semData(singlePage, sem):
63
+ # data = {}
64
+ # subject_pattern = re.compile(fr"BTN06{sem}\d+\s*\|\s*(\S+)\s*\|\s*\S+\s*\|\s*\S+\s*\|\s*\S+\s*\|\s*(\d+)\s*\|\s*(\d+)")
65
+ # matches = subject_pattern.findall(singlePage)
66
+ # for match in matches:
67
+ # subject_code = match[0]
68
+ # ese_marks = match[1]
69
+ # ise_marks = match[2]
70
+ # total_marks = str(int(ese_marks) + int(ise_marks))
71
+ # data[subject_code] = {
72
+ # "ESE": ese_marks,
73
+ # "ISE": ise_marks,
74
+ # "Total": total_marks
75
+ # }
76
+ # return data
77
+ #
78
+ # def write_data_to_csv(data_list, output_path):
79
+ # fieldnames = [
80
+ # "Exam_Seat_No", "PRN_No", "Name",
81
+ # "Sem3_Subject", "Sem3_ESE", "Sem3_ISE", "Sem3_Total",
82
+ # "Sem4_Subject", "Sem4_ESE", "Sem4_ISE", "Sem4_Total",
83
+ # "Status", "Percentage"
84
+ # ]
85
+ #
86
+ # with open(output_path, "w", newline="") as csvfile:
87
+ # writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
88
+ # writer.writeheader()
89
+ #
90
+ # for student in data_list:
91
+ # sem3_data = student["Sem3"]
92
+ # sem4_data = student["Sem4"]
93
+ #
94
+ # for subject_code, sem3_marks in sem3_data.items():
95
+ # sem4_marks = sem4_data.get(subject_code, {"ESE": "", "ISE": "", "Total": ""})
96
+ #
97
+ # writer.writerow({
98
+ # "Exam_Seat_No": student["Exam_Seat_No"],
99
+ # "PRN_No": student["PRN_No"],
100
+ # "Name": student["Name"],
101
+ # "Sem3_Subject": subject_code,
102
+ # "Sem3_ESE": sem3_marks["ESE"],
103
+ # "Sem3_ISE": sem3_marks["ISE"],
104
+ # "Sem3_Total": sem3_marks["Total"],
105
+ # "Sem4_Subject": subject_code,
106
+ # "Sem4_ESE": sem4_marks["ESE"],
107
+ # "Sem4_ISE": sem4_marks["ISE"],
108
+ # "Sem4_Total": sem4_marks["Total"],
109
+ # "Status": student["Status"],
110
+ # "Percentage": student["Percentage"]
111
+ # })
112
+ #
113
+ # def main():
114
+ # st.title("PDF to CSV Converter")
115
+ #
116
+ # # File upload section
117
+ # uploaded_file = st.file_uploader("Upload a PDF file", type="pdf")
118
+ #
119
+ # if uploaded_file is not None:
120
+ # # Save the uploaded PDF file to a temporary location
121
+ # input_pdf_path = save_uploaded_file(uploaded_file)
122
+ #
123
+ # try:
124
+ # # Extract data from the PDF
125
+ # data_list = extract_data_from_pdf(input_pdf_path)
126
+ #
127
+ # # Save extracted data to CSV
128
+ # output_csv_path = "/tmp/output.csv"
129
+ # write_data_to_csv(data_list, output_csv_path)
130
+ #
131
+ # # Provide download link for the CSV file
132
+ # st.success("PDF successfully processed!")
133
+ # st.markdown(get_binary_file_downloader_html(output_csv_path, "CSV"), unsafe_allow_html=True)
134
+ #
135
+ # except Exception as e:
136
+ # st.error(f"Error encountered during PDF extraction: {str(e)}")
137
+ #
138
+ # def save_uploaded_file(uploaded_file):
139
+ # # Save the uploaded PDF file to a temporary location
140
+ # temp_dir = "/tmp/pdf_converter"
141
+ # os.makedirs(temp_dir, exist_ok=True)
142
+ # input_pdf_path = os.path.join(temp_dir, "input.pdf")
143
+ # with open(input_pdf_path, "wb") as f:
144
+ # f.write(uploaded_file.read())
145
+ # return input_pdf_path
146
+ #
147
+ # def get_binary_file_downloader_html(bin_file, file_label='File'):
148
+ # # Generate download link for the CSV file
149
+ # with open(bin_file, 'rb') as f:
150
+ # data = f.read()
151
+ # b64 = base64.b64encode(data).decode()
152
+ # href = f'<a href="data:application/octet-stream;base64,{b64}" download="{os.path.basename(bin_file)}">Download {file_label}</a>'
153
+ # return href
154
+ #
155
+ # if __name__ == "__main__":
156
+ # main()
157
+ #