Vela commited on
Commit ·
2692728
1
Parent(s): 5d4ad83
removed extraction tool
Browse files- .gitignore +2 -1
- app.py +86 -119
- pages/database.py +0 -92
- src/utils/__pycache__/common_functions.cpython-313.pyc +0 -0
- src/utils/__pycache__/streamlit_function.cpython-313.pyc +0 -0
.gitignore
CHANGED
|
@@ -1,3 +1,4 @@
|
|
| 1 |
.venv
|
| 2 |
logs
|
| 3 |
-
.env
|
|
|
|
|
|
| 1 |
.venv
|
| 2 |
logs
|
| 3 |
+
.env
|
| 4 |
+
src/utils/__pycache__/
|
app.py
CHANGED
|
@@ -1,125 +1,92 @@
|
|
| 1 |
import streamlit as st
|
| 2 |
import pandas as pd
|
| 3 |
-
|
| 4 |
from src.utils import streamlit_function
|
| 5 |
-
from src.utils import
|
|
|
|
|
|
|
| 6 |
|
| 7 |
-
logger =
|
| 8 |
streamlit_function.config_homepage()
|
| 9 |
|
| 10 |
-
st.title("
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
#
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
# # for i, col in enumerate(columns):
|
| 92 |
-
# # if i < len(st.session_state.uploaded_files):
|
| 93 |
-
# # pdf_file = st.session_state.uploaded_files[i]
|
| 94 |
-
# # file_name = pdf_file.name.removesuffix(".pdf")
|
| 95 |
-
# # result_key = f"{MODEL}_result_file_{i+1}"
|
| 96 |
-
|
| 97 |
-
# # with col:
|
| 98 |
-
# # st.write(f"**File {i+1}:** `{pdf_file.name}`")
|
| 99 |
-
# # if st.button(f"Extract Data from File {i+1}", key=f"extract_btn_{i}"):
|
| 100 |
-
# # with st.spinner(f"Extracting data from File {i+1} using {MODEL}..."):
|
| 101 |
-
# # for schema in response_schema:
|
| 102 |
-
# # result = gemini_model.extract_emissions_data_as_json(API_1, MODEL, pdf_file, schema)
|
| 103 |
-
# # if schema == GEMINI_GHG_PARAMETERS:
|
| 104 |
-
# # column = "Greenhouse Gas (GHG) Protocol Parameters"
|
| 105 |
-
# # elif schema == GEMINI_ENVIRONMENTAL_PARAMETERS_CSRD:
|
| 106 |
-
# # column = "Environmental Parameters (CSRD)"
|
| 107 |
-
# # elif schema == GEMINI_ENVIRONMENT_PARAMETERS:
|
| 108 |
-
# # column = "Environmental Parameters"
|
| 109 |
-
# # elif schema == GEMINI_SOCIAL_PARAMETERS:
|
| 110 |
-
# # column = "Social Parameters"
|
| 111 |
-
# # elif schema == GEMINI_GOVERNANCE_PARAMETERS:
|
| 112 |
-
# # column = "Governance Parameters"
|
| 113 |
-
# # elif schema == GEMINI_MATERIALITY_PARAMETERS:
|
| 114 |
-
# # column = "Materiality Parameters"
|
| 115 |
-
# # elif schema == GEMINI_NET_ZERO_INTERVENTION_PARAMETERS:
|
| 116 |
-
# # column = "Net Zero Intervention Parameters"
|
| 117 |
-
# # else:
|
| 118 |
-
# # column = None
|
| 119 |
-
|
| 120 |
-
# # test.export_results_to_excel(result, sheet_name=MODEL, filename=file_name, column=column )
|
| 121 |
-
# # st.session_state[result_key] = result
|
| 122 |
-
|
| 123 |
-
# # if st.session_state.get(result_key):
|
| 124 |
-
# # st.write(f"**Extracted Metrics for File {i+1}:**")
|
| 125 |
-
# # st.json(st.session_state[result_key])
|
|
|
|
| 1 |
import streamlit as st
|
| 2 |
import pandas as pd
|
| 3 |
+
|
| 4 |
from src.utils import streamlit_function
|
| 5 |
+
from src.utils.logger import get_logger
|
| 6 |
+
from src.services.mongo_db_service import retrieve_documents
|
| 7 |
+
from src.utils.common_functions import prepare_comparison_df
|
| 8 |
|
| 9 |
+
logger = get_logger()
|
| 10 |
streamlit_function.config_homepage()
|
| 11 |
|
| 12 |
+
st.title("📊 ESG Report Comparison Dashboard")
|
| 13 |
+
|
| 14 |
+
METRIC_OPTIONS = {
|
| 15 |
+
"Report Metadata": ["report_metadata"],
|
| 16 |
+
"Environmental Parameters": [
|
| 17 |
+
"Emissions", "Energy Consumption", "Water Withdrawal", "Water Discharge",
|
| 18 |
+
"Waste Generation", "Waste Disposal", "Waste Recovery"
|
| 19 |
+
],
|
| 20 |
+
"Social Parameters": [
|
| 21 |
+
"Human Rights Training Coverage", "LTIFR", "Other Safety Incidents",
|
| 22 |
+
"Health & Safety Training Coverage", "Grievances Reported",
|
| 23 |
+
"Third-party Assessment Coverage", "CSR Beneficiaries", "Female Wage Share",
|
| 24 |
+
"Wages by Location", "Well-being Cost", "Worker Well-being Coverage",
|
| 25 |
+
"Employee Well-being Coverage", "Turnover Count", "Workforce Gender Diversity"
|
| 26 |
+
],
|
| 27 |
+
"Governance Parameters": [
|
| 28 |
+
"Non-compliance Instances", "Disciplinary Actions", "Consumer Complaints",
|
| 29 |
+
"Customer Data Breaches", "Governance Diversity", "Purchase Concentration",
|
| 30 |
+
"Sales Concentration", "Related Party Transactions"
|
| 31 |
+
],
|
| 32 |
+
"Materiality": ["material_topics"]
|
| 33 |
+
}
|
| 34 |
+
|
| 35 |
+
ESG_EXTRACTOR_COLLECTION = "esg_report_extracts"
|
| 36 |
+
|
| 37 |
+
company_docs = retrieve_documents(collection_name=ESG_EXTRACTOR_COLLECTION)
|
| 38 |
+
available_company_data = [doc["_id"] for doc in company_docs if "_id" in doc]
|
| 39 |
+
|
| 40 |
+
selected_companies = st.multiselect(
|
| 41 |
+
"Select up to 3 companies",
|
| 42 |
+
options=available_company_data,
|
| 43 |
+
max_selections=3
|
| 44 |
+
)
|
| 45 |
+
|
| 46 |
+
def get_all_years(docs) -> list:
|
| 47 |
+
years = set()
|
| 48 |
+
for doc in docs:
|
| 49 |
+
if "esg_reports" in doc and isinstance(doc["esg_reports"], dict):
|
| 50 |
+
years.update(doc["esg_reports"].keys())
|
| 51 |
+
return sorted(years, reverse=True)
|
| 52 |
+
|
| 53 |
+
def highlight_missing_values(df):
|
| 54 |
+
return df.style.map(lambda v: "background-color: #ffe6e6" if pd.isna(v) or str(v).strip() in ["", "nan", "None", "Not Available","N/A"] else "background-color: #e6ffe6")
|
| 55 |
+
|
| 56 |
+
def extract_company_name_from_doc(doc, default_name):
|
| 57 |
+
return doc.get("report_metadata", {}).get("company_legal_name", default_name)
|
| 58 |
+
|
| 59 |
+
if selected_companies:
|
| 60 |
+
all_years = get_all_years(company_docs)
|
| 61 |
+
|
| 62 |
+
selected_year = st.selectbox(
|
| 63 |
+
"Select a report year (applies to all selected companies)",
|
| 64 |
+
options=["-- Select Year --"] + all_years,
|
| 65 |
+
key="common_year"
|
| 66 |
+
)
|
| 67 |
+
|
| 68 |
+
if selected_year != "-- Select Year --":
|
| 69 |
+
tabs = st.tabs(list(METRIC_OPTIONS.keys()))
|
| 70 |
+
metric_categories = list(METRIC_OPTIONS.keys())
|
| 71 |
+
for i, tab in enumerate(tabs):
|
| 72 |
+
with tab:
|
| 73 |
+
st.subheader(metric_categories[i])
|
| 74 |
+
metric_keys = METRIC_OPTIONS[metric_categories[i]]
|
| 75 |
+
for metric in metric_keys:
|
| 76 |
+
st.markdown(f"### {metric}")
|
| 77 |
+
|
| 78 |
+
comparison_df = prepare_comparison_df(
|
| 79 |
+
selected_companies,
|
| 80 |
+
selected_year,
|
| 81 |
+
metric,
|
| 82 |
+
company_docs
|
| 83 |
+
)
|
| 84 |
+
|
| 85 |
+
if comparison_df is not None:
|
| 86 |
+
st.dataframe(highlight_missing_values(comparison_df), use_container_width=True)
|
| 87 |
+
else:
|
| 88 |
+
st.warning(f"No data found for **{metric}** in {selected_year}")
|
| 89 |
+
else:
|
| 90 |
+
st.info("Please select a year to view report comparisons.")
|
| 91 |
+
else:
|
| 92 |
+
st.info("Please select at least one company to continue.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
pages/database.py
DELETED
|
@@ -1,92 +0,0 @@
|
|
| 1 |
-
import streamlit as st
|
| 2 |
-
import pandas as pd
|
| 3 |
-
|
| 4 |
-
from src.utils import streamlit_function
|
| 5 |
-
from src.utils.logger import get_logger
|
| 6 |
-
from src.services.mongo_db_service import retrieve_documents
|
| 7 |
-
from src.utils.common_functions import prepare_comparison_df
|
| 8 |
-
|
| 9 |
-
logger = get_logger()
|
| 10 |
-
streamlit_function.config_homepage()
|
| 11 |
-
|
| 12 |
-
st.title("📊 ESG Report Comparison Dashboard")
|
| 13 |
-
|
| 14 |
-
METRIC_OPTIONS = {
|
| 15 |
-
"Report Metadata": ["report_metadata"],
|
| 16 |
-
"Environmental Parameters": [
|
| 17 |
-
"Emissions", "Energy Consumption", "Water Withdrawal", "Water Discharge",
|
| 18 |
-
"Waste Generation", "Waste Disposal", "Waste Recovery"
|
| 19 |
-
],
|
| 20 |
-
"Social Parameters": [
|
| 21 |
-
"Human Rights Training Coverage", "LTIFR", "Other Safety Incidents",
|
| 22 |
-
"Health & Safety Training Coverage", "Grievances Reported",
|
| 23 |
-
"Third-party Assessment Coverage", "CSR Beneficiaries", "Female Wage Share",
|
| 24 |
-
"Wages by Location", "Well-being Cost", "Worker Well-being Coverage",
|
| 25 |
-
"Employee Well-being Coverage", "Turnover Count", "Workforce Gender Diversity"
|
| 26 |
-
],
|
| 27 |
-
"Governance Parameters": [
|
| 28 |
-
"Non-compliance Instances", "Disciplinary Actions", "Consumer Complaints",
|
| 29 |
-
"Customer Data Breaches", "Governance Diversity", "Purchase Concentration",
|
| 30 |
-
"Sales Concentration", "Related Party Transactions"
|
| 31 |
-
],
|
| 32 |
-
"Materiality": ["material_topics"]
|
| 33 |
-
}
|
| 34 |
-
|
| 35 |
-
ESG_EXTRACTOR_COLLECTION = "esg_report_extracts"
|
| 36 |
-
|
| 37 |
-
company_docs = retrieve_documents(collection_name=ESG_EXTRACTOR_COLLECTION)
|
| 38 |
-
available_company_data = [doc["_id"] for doc in company_docs if "_id" in doc]
|
| 39 |
-
|
| 40 |
-
selected_companies = st.multiselect(
|
| 41 |
-
"Select up to 3 companies",
|
| 42 |
-
options=available_company_data,
|
| 43 |
-
max_selections=3
|
| 44 |
-
)
|
| 45 |
-
|
| 46 |
-
def get_all_years(docs) -> list:
|
| 47 |
-
years = set()
|
| 48 |
-
for doc in docs:
|
| 49 |
-
if "esg_reports" in doc and isinstance(doc["esg_reports"], dict):
|
| 50 |
-
years.update(doc["esg_reports"].keys())
|
| 51 |
-
return sorted(years, reverse=True)
|
| 52 |
-
|
| 53 |
-
def highlight_missing_values(df):
|
| 54 |
-
return df.style.map(lambda v: "background-color: #ffe6e6" if pd.isna(v) or str(v).strip() in ["", "nan", "None", "Not Available","N/A"] else "background-color: #e6ffe6")
|
| 55 |
-
|
| 56 |
-
def extract_company_name_from_doc(doc, default_name):
|
| 57 |
-
return doc.get("report_metadata", {}).get("company_legal_name", default_name)
|
| 58 |
-
|
| 59 |
-
if selected_companies:
|
| 60 |
-
all_years = get_all_years(company_docs)
|
| 61 |
-
|
| 62 |
-
selected_year = st.selectbox(
|
| 63 |
-
"Select a report year (applies to all selected companies)",
|
| 64 |
-
options=["-- Select Year --"] + all_years,
|
| 65 |
-
key="common_year"
|
| 66 |
-
)
|
| 67 |
-
|
| 68 |
-
if selected_year != "-- Select Year --":
|
| 69 |
-
tabs = st.tabs(list(METRIC_OPTIONS.keys()))
|
| 70 |
-
metric_categories = list(METRIC_OPTIONS.keys())
|
| 71 |
-
for i, tab in enumerate(tabs):
|
| 72 |
-
with tab:
|
| 73 |
-
st.subheader(metric_categories[i])
|
| 74 |
-
metric_keys = METRIC_OPTIONS[metric_categories[i]]
|
| 75 |
-
for metric in metric_keys:
|
| 76 |
-
st.markdown(f"### {metric}")
|
| 77 |
-
|
| 78 |
-
comparison_df = prepare_comparison_df(
|
| 79 |
-
selected_companies,
|
| 80 |
-
selected_year,
|
| 81 |
-
metric,
|
| 82 |
-
company_docs
|
| 83 |
-
)
|
| 84 |
-
|
| 85 |
-
if comparison_df is not None:
|
| 86 |
-
st.dataframe(highlight_missing_values(comparison_df), use_container_width=True)
|
| 87 |
-
else:
|
| 88 |
-
st.warning(f"No data found for **{metric}** in {selected_year}")
|
| 89 |
-
else:
|
| 90 |
-
st.info("Please select a year to view report comparisons.")
|
| 91 |
-
else:
|
| 92 |
-
st.info("Please select at least one company to continue.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/utils/__pycache__/common_functions.cpython-313.pyc
CHANGED
|
Binary files a/src/utils/__pycache__/common_functions.cpython-313.pyc and b/src/utils/__pycache__/common_functions.cpython-313.pyc differ
|
|
|
src/utils/__pycache__/streamlit_function.cpython-313.pyc
CHANGED
|
Binary files a/src/utils/__pycache__/streamlit_function.cpython-313.pyc and b/src/utils/__pycache__/streamlit_function.cpython-313.pyc differ
|
|
|