Katie-Ch commited on
Commit
71111d9
·
verified ·
1 Parent(s): 05e31ae

Update verifier.py

Browse files
Files changed (1) hide show
  1. verifier.py +39 -0
verifier.py CHANGED
@@ -59,6 +59,45 @@ def verifier_page():
59
  # Update the session state with the selected file details
60
  st.session_state.selected_file = row['filename']
61
  st.session_state.selected_text = row['text']
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
 
63
  # Configure AI (this could be dynamic depending on how your setup works)
64
  GOOGLE_API_KEY = "AIzaSyC7TpzrIH_3-dppWE8exqdZX3DAdE6cy8w"
 
59
  # Update the session state with the selected file details
60
  st.session_state.selected_file = row['filename']
61
  st.session_state.selected_text = row['text']
62
+
63
+
64
+ def extract_text_from_pdf(uploaded_file, start_page, end_page):
65
+ if uploaded_file is None:
66
+ return "" # Return an empty string if no file is uploaded
67
+
68
+ reader = PyPDF2.PdfReader(uploaded_file)
69
+ num_pages = len(reader.pages)
70
+
71
+ if start_page < 0 or start_page >= num_pages:
72
+ start_page = 0
73
+ if end_page < start_page or end_page >= num_pages:
74
+ end_page = num_pages - 1
75
+
76
+ text = ''
77
+ for page_num in range(start_page, end_page + 1):
78
+ page = reader.pages[page_num]
79
+ text += page.extract_text()
80
+
81
+ return text
82
+
83
+
84
+ pdf_path = 'VCS-Standard.pdf'
85
+ start_page = 0 # Start extracting from the first page (0-based index)
86
+ end_page = 93 # Extract up to the third page (0-based index)
87
+ vcs_text = extract_text_from_pdf(pdf_path, start_page, end_page)
88
+ print(vcs_text)
89
+
90
+ pdf_path = 'VCS-Methodology-Requirements.pdf'
91
+ start_page = 0 # Start extracting from the first page (0-based index)
92
+ end_page = 89 # Extract up to the third page (0-based index)
93
+ methodology_text = extract_text_from_pdf(pdf_path, start_page, end_page)
94
+ print(methodology_text)
95
+
96
+ pdf_path = 'VCS-Project-Description-Template-v4.4-FINAL2.docx.pdf'
97
+ start_page = 0 # Start extracting from the first page (0-based index)
98
+ end_page = 34 # Extract up to the third page (0-based index)
99
+ template_text = extract_text_from_pdf(pdf_path, start_page, end_page)
100
+ print(template_text)
101
 
102
  # Configure AI (this could be dynamic depending on how your setup works)
103
  GOOGLE_API_KEY = "AIzaSyC7TpzrIH_3-dppWE8exqdZX3DAdE6cy8w"