Update verifier.py
Browse files- verifier.py +57 -63
verifier.py
CHANGED
|
@@ -13,7 +13,7 @@ def verifier_page():
|
|
| 13 |
|
| 14 |
# Create a Firestore client
|
| 15 |
db = firestore.client()
|
| 16 |
-
|
| 17 |
# Fetch and display submissions
|
| 18 |
def fetch_data(selected_fields):
|
| 19 |
docs = db.collection('pdf_uploads').stream()
|
|
@@ -25,17 +25,9 @@ def verifier_page():
|
|
| 25 |
return data
|
| 26 |
|
| 27 |
fields_to_fetch = ['filename', 'pdf_url', 'text']
|
| 28 |
-
|
| 29 |
-
# Initialize session_state attributes if not present
|
| 30 |
-
if 'selected_file' not in st.session_state:
|
| 31 |
-
st.session_state.selected_file = None
|
| 32 |
-
if 'selected_text' not in st.session_state:
|
| 33 |
-
st.session_state.selected_text = ""
|
| 34 |
-
if 'selected_ai' not in st.session_state:
|
| 35 |
-
st.session_state.selected_ai = ""
|
| 36 |
-
|
| 37 |
# Check if a file has already been selected and displayed
|
| 38 |
-
if st.session_state
|
| 39 |
# When a file is selected, clear everything and display the details only
|
| 40 |
st.empty() # Clear all content before displaying details
|
| 41 |
st.title(f"Selected File: {st.session_state.selected_file}")
|
|
@@ -49,7 +41,7 @@ def verifier_page():
|
|
| 49 |
|
| 50 |
# Display the DataFrame
|
| 51 |
st.dataframe(df)
|
| 52 |
-
|
| 53 |
# Add buttons for each row in the DataFrame
|
| 54 |
for index, row in df.iterrows():
|
| 55 |
col1, col2 = st.columns([4, 1])
|
|
@@ -60,62 +52,64 @@ def verifier_page():
|
|
| 60 |
# Button to view details of each row
|
| 61 |
button_key = f"view_{index}"
|
| 62 |
if st.button("See details", key=button_key):
|
| 63 |
-
# Remove all existing content
|
| 64 |
-
st.empty() # Clear all the existing content from the page
|
| 65 |
-
|
| 66 |
# Update the session state with the selected file details
|
| 67 |
st.session_state.selected_file = row['filename']
|
| 68 |
st.session_state.selected_text = row['text']
|
| 69 |
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
for page_num in range(start_page, end_page + 1):
|
| 85 |
-
page = reader.pages[page_num]
|
| 86 |
-
text += page.extract_text()
|
| 87 |
-
|
| 88 |
-
return text
|
| 89 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 90 |
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
pdf_path = 'VCS-Methodology-Requirements.pdf'
|
| 98 |
-
start_page = 0
|
| 99 |
-
end_page = 89
|
| 100 |
-
methodology_text = extract_text_from_pdf(pdf_path, start_page, end_page)
|
| 101 |
-
|
| 102 |
-
pdf_path = 'VCS-Project-Description-Template-v4.4-FINAL2.docx.pdf'
|
| 103 |
-
start_page = 0
|
| 104 |
-
end_page = 34
|
| 105 |
-
template_text = extract_text_from_pdf(pdf_path, start_page, end_page)
|
| 106 |
-
|
| 107 |
-
# Configure the AI model
|
| 108 |
-
GOOGLE_API_KEY = "AIzaSyC7TpzrIH_3-dppWE8exqdZX3DAdE6cy8w"
|
| 109 |
-
genai.configure(api_key=GOOGLE_API_KEY)
|
| 110 |
-
model = genai.GenerativeModel('gemini-1.5-flash-latest')
|
| 111 |
-
|
| 112 |
-
# Generate the AI response based on the selected file's text
|
| 113 |
-
response = model.generate_content(
|
| 114 |
-
"You are a project verifier officer at Verra, the leading registry for projects used to generate carbon credits. Your job is to look into project submissions from project developers who create and implement nature-based solutions in order to generate carbon credits. You go through the content of the project submissions to investigate whether the submission fits into the vcs standards, methodology requirements, and touches everything on the project description template. A verifier has to compare the submission to these 3 main criteria. As a verifier, I want you to evaluate the project submission below based on the resources listed below. The output should be in the format of summary of the project submission, the level of adherence to the standards, what needs to be fixed, and notes for improvement for project developers. The output needs to have project-specific feedback. You can bolster your feedback with quotes from the submission or referencing numbers mentioned in the submission. Here is the project submission:" + st.session_state.selected_text + "Here is the vcs standards:" + vcs_text + "Here is the methodology requirement:" + methodology_text + "Here is the project description template:" + template_text)
|
| 115 |
|
| 116 |
-
|
| 117 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 118 |
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
|
| 14 |
# Create a Firestore client
|
| 15 |
db = firestore.client()
|
| 16 |
+
|
| 17 |
# Fetch and display submissions
|
| 18 |
def fetch_data(selected_fields):
|
| 19 |
docs = db.collection('pdf_uploads').stream()
|
|
|
|
| 25 |
return data
|
| 26 |
|
| 27 |
fields_to_fetch = ['filename', 'pdf_url', 'text']
|
| 28 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
# Check if a file has already been selected and displayed
|
| 30 |
+
if 'selected_file' in st.session_state:
|
| 31 |
# When a file is selected, clear everything and display the details only
|
| 32 |
st.empty() # Clear all content before displaying details
|
| 33 |
st.title(f"Selected File: {st.session_state.selected_file}")
|
|
|
|
| 41 |
|
| 42 |
# Display the DataFrame
|
| 43 |
st.dataframe(df)
|
| 44 |
+
|
| 45 |
# Add buttons for each row in the DataFrame
|
| 46 |
for index, row in df.iterrows():
|
| 47 |
col1, col2 = st.columns([4, 1])
|
|
|
|
| 52 |
# Button to view details of each row
|
| 53 |
button_key = f"view_{index}"
|
| 54 |
if st.button("See details", key=button_key):
|
|
|
|
|
|
|
|
|
|
| 55 |
# Update the session state with the selected file details
|
| 56 |
st.session_state.selected_file = row['filename']
|
| 57 |
st.session_state.selected_text = row['text']
|
| 58 |
|
| 59 |
+
# Clear existing content on button click
|
| 60 |
+
st.experimental_rerun()
|
| 61 |
+
|
| 62 |
+
if 'selected_file' in st.session_state:
|
| 63 |
+
# Process the selected file and generate AI response
|
| 64 |
+
pdf_path = 'VCS-Standard.pdf'
|
| 65 |
+
start_page = 0 # Start extracting from the first page (0-based index)
|
| 66 |
+
end_page = 93 # Extract up to the third page (0-based index)
|
| 67 |
+
vcs_text = extract_text_from_pdf(pdf_path, start_page, end_page)
|
| 68 |
+
|
| 69 |
+
pdf_path = 'VCS-Methodology-Requirements.pdf'
|
| 70 |
+
start_page = 0 # Start extracting from the first page (0-based index)
|
| 71 |
+
end_page = 89 # Extract up to the third page (0-based index)
|
| 72 |
+
methodology_text = extract_text_from_pdf(pdf_path, start_page, end_page)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 73 |
|
| 74 |
+
pdf_path = 'VCS-Project-Description-Template-v4.4-FINAL2.docx.pdf'
|
| 75 |
+
start_page = 0 # Start extracting from the first page (0-based index)
|
| 76 |
+
end_page = 34 # Extract up to the third page (0-based index)
|
| 77 |
+
template_text = extract_text_from_pdf(pdf_path, start_page, end_page)
|
| 78 |
+
|
| 79 |
+
# Configure AI (this could be dynamic depending on how your setup works)
|
| 80 |
+
GOOGLE_API_KEY = "AIzaSyC7TpzrIH_3-dppWE8exqdZX3DAdE6cy8w"
|
| 81 |
+
genai.configure(api_key=GOOGLE_API_KEY)
|
| 82 |
|
| 83 |
+
# Example of working with LLM models (Gemini 1.5)
|
| 84 |
+
model = genai.GenerativeModel('gemini-1.5-flash-latest')
|
| 85 |
+
|
| 86 |
+
# Generate the AI response based on the text of the selected file
|
| 87 |
+
response = model.generate_content("You are a project verifier officer at Verra, the leading registry for projects used to generate carbon credits. Your job is to look into project submissions from project developers who create an implement nature-based solutions in order to generate carbon credits. You go through the content of the project submissions to investigate whether the submission fits into the vcs standards, methodology requirements, and touches everything on the project description template. A verifier has to compare the submission to these 3 main criteria. As a verifier, I want you to evaluate the project submission below based on the resources listed below. The output should be in the format of summary of the project submission, the level of adherence to the standards, what needs to be fixed, and notes for improvement for project developers. The output needs to have project-specific feedback. You can bolster your feedback with quotes from the submission or referencing numbers mentioned in the submission. Here is the project submission:" + st.session_state.selected_text + "Here is the vcs standards:" + vcs_text + "Here is the methodology requirement:" + methodology_text + "Here is the project description template:" + template_text)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 88 |
|
| 89 |
+
# Save the response and filename in session state
|
| 90 |
+
st.session_state.selected_ai = response.text
|
| 91 |
+
|
| 92 |
+
# Clear everything and show file details and AI response
|
| 93 |
+
st.empty()
|
| 94 |
+
st.title(f"Selected File: {st.session_state.selected_file}")
|
| 95 |
+
st.write(f"AI Response: {st.session_state.selected_ai}")
|
| 96 |
+
|
| 97 |
+
# Helper function to extract text from PDF
|
| 98 |
+
def extract_text_from_pdf(uploaded_file, start_page, end_page):
|
| 99 |
+
if uploaded_file is None:
|
| 100 |
+
return "" # Return an empty string if no file is uploaded
|
| 101 |
|
| 102 |
+
reader = PyPDF2.PdfReader(uploaded_file)
|
| 103 |
+
num_pages = len(reader.pages)
|
| 104 |
+
|
| 105 |
+
if start_page < 0 or start_page >= num_pages:
|
| 106 |
+
start_page = 0
|
| 107 |
+
if end_page < start_page or end_page >= num_pages:
|
| 108 |
+
end_page = num_pages - 1
|
| 109 |
+
|
| 110 |
+
text = ''
|
| 111 |
+
for page_num in range(start_page, end_page + 1):
|
| 112 |
+
page = reader.pages[page_num]
|
| 113 |
+
text += page.extract_text()
|
| 114 |
+
|
| 115 |
+
return text
|