Krish30 commited on
Commit
52abe54
·
verified ·
1 Parent(s): 414c139

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +90 -0
app.py ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # V5 engg upload
2
+
3
+ import streamlit as st
4
+ import pandas as pd
5
+ import tabula
6
+ import os
7
+ from io import BytesIO
8
+
9
+ # Engineering Result Type 1 Functions
10
+ def extract_engineering_result(pdf_path):
11
+ try:
12
+ df = tabula.read_pdf(pdf_path, pages='all', multiple_tables=True)
13
+ return df
14
+ except Exception as e:
15
+ st.error(f"Error extracting data from Engineering PDF: {e}")
16
+ return None
17
+
18
+ # HSC Result Function
19
+ def extract_hsc_result(pdf_path):
20
+ try:
21
+ df = tabula.read_pdf(pdf_path, pages='all')
22
+ return df
23
+ except Exception as e:
24
+ st.error(f"Error extracting data from HSC PDF: {e}")
25
+ return None
26
+
27
+ # Diploma Result Function
28
+ def extract_diploma_result(pdf_path):
29
+ try:
30
+ df = tabula.read_pdf(pdf_path, pages='all')
31
+ return df
32
+ except Exception as e:
33
+ st.error(f"Error extracting data from Diploma PDF: {e}")
34
+ return None
35
+
36
+ # Streamlit App
37
+ def main():
38
+ st.title("PDF Result Converter")
39
+
40
+ # File Upload
41
+ uploaded_file = st.file_uploader("Upload a PDF file", type=["pdf"])
42
+
43
+ if uploaded_file is not None:
44
+ file_details = {"FileName": uploaded_file.name, "FileType": uploaded_file.type}
45
+ st.write(file_details)
46
+
47
+ # Determine which type of PDF and call the appropriate extraction function
48
+ if "engineering" in uploaded_file.name.lower() or "engg" in uploaded_file.name.lower():
49
+ extracted_data = extract_engineering_result(uploaded_file)
50
+ elif "hsc" in uploaded_file.name.lower():
51
+ extracted_data = extract_hsc_result(uploaded_file)
52
+ elif "diploma" in uploaded_file.name.lower():
53
+ extracted_data = extract_diploma_result(uploaded_file)
54
+ else:
55
+ st.error("Unsupported PDF type. Please upload a valid PDF.")
56
+ return
57
+
58
+ # Concatenate all extracted DataFrames into a single DataFrame
59
+ if extracted_data is not None and isinstance(extracted_data, list):
60
+ combined_df = pd.concat(extracted_data, ignore_index=True)
61
+ elif extracted_data is not None and isinstance(extracted_data, pd.DataFrame):
62
+ combined_df = extracted_data
63
+ else:
64
+ st.error("No data extracted or extraction failed. Please check the PDF file and extraction logic.")
65
+ return
66
+
67
+ # Display the extracted data (for debugging purposes)
68
+ st.subheader("Combined Extracted Data:")
69
+ st.write(combined_df)
70
+
71
+ # Convert to Excel and create download link
72
+ if st.button("Convert to Excel"):
73
+ output = BytesIO()
74
+ excel_writer = pd.ExcelWriter(output, engine='xlsxwriter')
75
+ combined_df.to_excel(excel_writer, index=False, sheet_name='Sheet1')
76
+ excel_writer.close()
77
+ excel_data = output.getvalue()
78
+ output.seek(0)
79
+
80
+ # Provide a download button for the generated Excel file
81
+ st.download_button(
82
+ label="Download Excel File",
83
+ data=excel_data,
84
+ file_name=f"{uploaded_file.name.split('.')[0]}.xlsx",
85
+ mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
86
+ key="download_excel"
87
+ )
88
+
89
+ if __name__ == "__main__":
90
+ main()