Krish30 commited on
Commit
e5c44c7
·
verified ·
1 Parent(s): d24497b

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +168 -0
app.py ADDED
@@ -0,0 +1,168 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """app1
3
+
4
+ Automatically generated by Colab.
5
+
6
+ Original file is located at
7
+ https://colab.research.google.com/drive/1-gnGd0dCkJpDPuHmih9Ev8b0E_mP0swv
8
+ """
9
+
10
+ ! pip install streamlit
11
+
12
+ ! pip install pypdf2
13
+
14
+ # Commented out IPython magic to ensure Python compatibility.
15
+ # %%writefile app.py
16
+ # import streamlit as st
17
+ # import PyPDF2
18
+ # import re
19
+ # import csv
20
+ # import base64
21
+ # import os # Import the os module for file operations
22
+ #
23
+ # def extract_data_from_pdf(pdf_path):
24
+ # data_list = []
25
+ # with open(pdf_path, "rb") as file:
26
+ # reader = PyPDF2.PdfReader(file)
27
+ # for page_num in range(2, len(reader.pages)):
28
+ # single_page = reader.pages[page_num].extract_text()
29
+ # data = singlePageData(single_page)
30
+ # data_list.append(data)
31
+ # return data_list
32
+ #
33
+ # def singlePageData(singlePage):
34
+ # seat_no_pattern = re.compile(r"Seat No:\s*([^\s]+)")
35
+ # seat_match = seat_no_pattern.search(singlePage)
36
+ # seat_no = seat_match.group(1) if seat_match else ""
37
+ #
38
+ # prn_no_pattern = re.compile(r"PRN:\s*(\d+)")
39
+ # prn_no_match = prn_no_pattern.search(singlePage)
40
+ # prn_no = prn_no_match.group(1) if prn_no_match else ""
41
+ #
42
+ # name_pattern = re.compile(r"Name:\s*([^\n]+)")
43
+ # name_match = name_pattern.search(singlePage)
44
+ # name = name_match.group(1).strip() if name_match else ""
45
+ #
46
+ # sem3_data = semData(singlePage, 3)
47
+ # sem4_data = semData(singlePage, 4)
48
+ #
49
+ # overall_status_pattern = re.compile(r"\|Status:\s*(\w+)\s*\|C")
50
+ # overall_status_match = overall_status_pattern.search(singlePage)
51
+ # overall_status = overall_status_match.group(1) if overall_status_match else ""
52
+ #
53
+ # percentage_match = re.compile(r"\|Percentage:\s*(\d+\.\d+)\s*\%").search(singlePage)
54
+ # percentage = percentage_match.group(1) if percentage_match else ""
55
+ #
56
+ # return {
57
+ # "Exam_Seat_No": seat_no,
58
+ # "PRN_No": prn_no,
59
+ # "Name": name,
60
+ # "Sem3": sem3_data,
61
+ # "Sem4": sem4_data,
62
+ # "Status": overall_status,
63
+ # "Percentage": percentage,
64
+ # }
65
+ #
66
+ # def semData(singlePage, sem):
67
+ # data = {}
68
+ # subject_pattern = re.compile(fr"BTN06{sem}\d+\s*\|\s*(\S+)\s*\|\s*\S+\s*\|\s*\S+\s*\|\s*\S+\s*\|\s*(\d+)\s*\|\s*(\d+)")
69
+ # matches = subject_pattern.findall(singlePage)
70
+ # for match in matches:
71
+ # subject_code = match[0]
72
+ # ese_marks = match[1]
73
+ # ise_marks = match[2]
74
+ # total_marks = str(int(ese_marks) + int(ise_marks))
75
+ # data[subject_code] = {
76
+ # "ESE": ese_marks,
77
+ # "ISE": ise_marks,
78
+ # "Total": total_marks
79
+ # }
80
+ # return data
81
+ #
82
+ # def write_data_to_csv(data_list, output_path):
83
+ # fieldnames = [
84
+ # "Exam_Seat_No", "PRN_No", "Name",
85
+ # "Sem3_Subject", "Sem3_ESE", "Sem3_ISE", "Sem3_Total",
86
+ # "Sem4_Subject", "Sem4_ESE", "Sem4_ISE", "Sem4_Total",
87
+ # "Status", "Percentage"
88
+ # ]
89
+ #
90
+ # with open(output_path, "w", newline="") as csvfile:
91
+ # writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
92
+ # writer.writeheader()
93
+ #
94
+ # for student in data_list:
95
+ # sem3_data = student["Sem3"]
96
+ # sem4_data = student["Sem4"]
97
+ #
98
+ # for subject_code, sem3_marks in sem3_data.items():
99
+ # sem4_marks = sem4_data.get(subject_code, {"ESE": "", "ISE": "", "Total": ""})
100
+ #
101
+ # writer.writerow({
102
+ # "Exam_Seat_No": student["Exam_Seat_No"],
103
+ # "PRN_No": student["PRN_No"],
104
+ # "Name": student["Name"],
105
+ # "Sem3_Subject": subject_code,
106
+ # "Sem3_ESE": sem3_marks["ESE"],
107
+ # "Sem3_ISE": sem3_marks["ISE"],
108
+ # "Sem3_Total": sem3_marks["Total"],
109
+ # "Sem4_Subject": subject_code,
110
+ # "Sem4_ESE": sem4_marks["ESE"],
111
+ # "Sem4_ISE": sem4_marks["ISE"],
112
+ # "Sem4_Total": sem4_marks["Total"],
113
+ # "Status": student["Status"],
114
+ # "Percentage": student["Percentage"]
115
+ # })
116
+ #
117
+ # def main():
118
+ # st.title("PDF to CSV Converter")
119
+ #
120
+ # # File upload section
121
+ # uploaded_file = st.file_uploader("Upload a PDF file", type="pdf")
122
+ #
123
+ # if uploaded_file is not None:
124
+ # # Save the uploaded PDF file to a temporary location
125
+ # input_pdf_path = save_uploaded_file(uploaded_file)
126
+ #
127
+ # try:
128
+ # # Extract data from the PDF
129
+ # data_list = extract_data_from_pdf(input_pdf_path)
130
+ #
131
+ # # Save extracted data to CSV
132
+ # output_csv_path = "/tmp/output.csv"
133
+ # write_data_to_csv(data_list, output_csv_path)
134
+ #
135
+ # # Provide download link for the CSV file
136
+ # st.success("PDF successfully processed!")
137
+ # st.markdown(get_binary_file_downloader_html(output_csv_path, "CSV"), unsafe_allow_html=True)
138
+ #
139
+ # except Exception as e:
140
+ # st.error(f"Error encountered during PDF extraction: {str(e)}")
141
+ #
142
+ # def save_uploaded_file(uploaded_file):
143
+ # # Save the uploaded PDF file to a temporary location
144
+ # temp_dir = "/tmp/pdf_converter"
145
+ # os.makedirs(temp_dir, exist_ok=True)
146
+ # input_pdf_path = os.path.join(temp_dir, "input.pdf")
147
+ # with open(input_pdf_path, "wb") as f:
148
+ # f.write(uploaded_file.read())
149
+ # return input_pdf_path
150
+ #
151
+ # def get_binary_file_downloader_html(bin_file, file_label='File'):
152
+ # # Generate download link for the CSV file
153
+ # with open(bin_file, 'rb') as f:
154
+ # data = f.read()
155
+ # b64 = base64.b64encode(data).decode()
156
+ # href = f'<a href="data:application/octet-stream;base64,{b64}" download="{os.path.basename(bin_file)}">Download {file_label}</a>'
157
+ # return href
158
+ #
159
+ # if __name__ == "__main__":
160
+ # main()
161
+ #
162
+
163
+ !npm install localtunnel
164
+
165
+ !streamlit run app.py &>/content/logs.txt &
166
+
167
+ !npx localtunnel --port 8501 & curl ipv4.icanhazip.com
168
+