Update verifier.py
Browse files- verifier.py +39 -0
verifier.py
CHANGED
|
@@ -59,6 +59,45 @@ def verifier_page():
|
|
| 59 |
# Update the session state with the selected file details
|
| 60 |
st.session_state.selected_file = row['filename']
|
| 61 |
st.session_state.selected_text = row['text']
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 62 |
|
| 63 |
# Configure AI (this could be dynamic depending on how your setup works)
|
| 64 |
GOOGLE_API_KEY = "AIzaSyC7TpzrIH_3-dppWE8exqdZX3DAdE6cy8w"
|
|
|
|
| 59 |
# Update the session state with the selected file details
|
| 60 |
st.session_state.selected_file = row['filename']
|
| 61 |
st.session_state.selected_text = row['text']
|
| 62 |
+
|
| 63 |
+
|
| 64 |
+
def extract_text_from_pdf(uploaded_file, start_page, end_page):
|
| 65 |
+
if uploaded_file is None:
|
| 66 |
+
return "" # Return an empty string if no file is uploaded
|
| 67 |
+
|
| 68 |
+
reader = PyPDF2.PdfReader(uploaded_file)
|
| 69 |
+
num_pages = len(reader.pages)
|
| 70 |
+
|
| 71 |
+
if start_page < 0 or start_page >= num_pages:
|
| 72 |
+
start_page = 0
|
| 73 |
+
if end_page < start_page or end_page >= num_pages:
|
| 74 |
+
end_page = num_pages - 1
|
| 75 |
+
|
| 76 |
+
text = ''
|
| 77 |
+
for page_num in range(start_page, end_page + 1):
|
| 78 |
+
page = reader.pages[page_num]
|
| 79 |
+
text += page.extract_text()
|
| 80 |
+
|
| 81 |
+
return text
|
| 82 |
+
|
| 83 |
+
|
| 84 |
+
pdf_path = 'VCS-Standard.pdf'
|
| 85 |
+
start_page = 0 # Start extracting from the first page (0-based index)
|
| 86 |
+
end_page = 93 # Extract up to the third page (0-based index)
|
| 87 |
+
vcs_text = extract_text_from_pdf(pdf_path, start_page, end_page)
|
| 88 |
+
print(vcs_text)
|
| 89 |
+
|
| 90 |
+
pdf_path = 'VCS-Methodology-Requirements.pdf'
|
| 91 |
+
start_page = 0 # Start extracting from the first page (0-based index)
|
| 92 |
+
end_page = 89 # Extract up to the third page (0-based index)
|
| 93 |
+
methodology_text = extract_text_from_pdf(pdf_path, start_page, end_page)
|
| 94 |
+
print(methodology_text)
|
| 95 |
+
|
| 96 |
+
pdf_path = 'VCS-Project-Description-Template-v4.4-FINAL2.docx.pdf'
|
| 97 |
+
start_page = 0 # Start extracting from the first page (0-based index)
|
| 98 |
+
end_page = 34 # Extract up to the third page (0-based index)
|
| 99 |
+
template_text = extract_text_from_pdf(pdf_path, start_page, end_page)
|
| 100 |
+
print(template_text)
|
| 101 |
|
| 102 |
# Configure AI (this could be dynamic depending on how your setup works)
|
| 103 |
GOOGLE_API_KEY = "AIzaSyC7TpzrIH_3-dppWE8exqdZX3DAdE6cy8w"
|