Spaces:

VelaTest
/

Sustainability_Report_Extractor

Sleeping

App Files Files Community

Vela commited on Jun 9, 2025

Commit

2692728

1 Parent(s): 5d4ad83

removed extraction tool

Browse files

Files changed (5) hide show

.gitignore +2 -1
app.py +86 -119
pages/database.py +0 -92
src/utils/__pycache__/common_functions.cpython-313.pyc +0 -0
src/utils/__pycache__/streamlit_function.cpython-313.pyc +0 -0

.gitignore CHANGED Viewed

@@ -1,3 +1,4 @@
 .venv
 logs
-.env

 .venv
 logs
+.env
+src/utils/__pycache__/

app.py CHANGED Viewed

@@ -1,125 +1,92 @@
 import streamlit as st
 import pandas as pd
-import os
 from src.utils import streamlit_function
-from src.utils import logger
-logger = logger.get_logger()
 streamlit_function.config_homepage()
-st.title("Sustainability Report Analyzer")
-st.write("Upload your sustainability report PDF and generate insights using Gemini models.")
-uploaded_files = streamlit_function.upload_file("pdf", label="📤 Upload Sustainability Report PDF")
-if uploaded_files:
-    st.session_state.uploaded_files = uploaded_files
-if "uploaded_files" not in st.session_state:
-    st.session_state.uploaded_files = []
-if st.session_state.uploaded_files:
-    columns = st.columns(1)
-# # import streamlit as st
-# # from application.schemas.response_schema import GEMINI_GHG_PARAMETERS, GEMINI_ENVIRONMENTAL_PARAMETERS_CSRD,GEMINI_ENVIRONMENT_PARAMETERS,GEMINI_SOCIAL_PARAMETERS, GEMINI_GOVERNANCE_PARAMETERS, GEMINI_MATERIALITY_PARAMETERS, GEMINI_NET_ZERO_INTERVENTION_PARAMETERS
-# # from application.services import streamlit_function, gemini_model
-# # from application.utils import logger
-# # import test
-# # logger = logger.get_logger()
-# # streamlit_function.config_homepage()
-# # st.title("Sustainability Report Analyzer")
-# # st.write("Upload your sustainability report PDF and generate insights using different models.")
-# # MODEL = ["gemini-1.5-pro-latest", "gemini-2.0-flash", "gemini-1.5-flash", "gemini-2.5-pro-exp-03-25"]
-# # MODEL_1 = "gemini-1.5-pro-latest"
-# # MODEL_2 = "gemini-2.0-flash"
-# # MODEL_3 = "gemini-1.5-flash"
-# # API_1 = "gemini"
-# # API_2 = "gemini"
-# # API_3 = "gemini"
-# # response_schema = [ GEMINI_GHG_PARAMETERS, GEMINI_ENVIRONMENTAL_PARAMETERS_CSRD,
-# #                     GEMINI_ENVIRONMENT_PARAMETERS,GEMINI_SOCIAL_PARAMETERS,
-# #                     GEMINI_GOVERNANCE_PARAMETERS, GEMINI_MATERIALITY_PARAMETERS,
-# #                     GEMINI_NET_ZERO_INTERVENTION_PARAMETERS]
-# # if "uploaded_files" not in st.session_state:
-# #     st.session_state.uploaded_files = []
-# # MODEL = st.selectbox(
-# #     "Select Model",
-# #     options=MODEL,
-# #     index=0,
-# # )
-# # uploaded_files = streamlit_function.upload_file("pdf", label="Upload Sustainability Report PDF")
-# # if uploaded_files:
-# #     st.session_state.uploaded_files = uploaded_files
-# # if st.session_state.uploaded_files:
-# #     columns = st.columns([5, 5, 5], gap="small")
-# #     for i, col in enumerate(columns):
-# #         if i < len(st.session_state.uploaded_files):
-# #             pdf_file = st.session_state.uploaded_files[i]
-# #             file_name = pdf_file.name.removesuffix(".pdf")
-# #             result_key = f"{MODEL}_result_file_{i+1}"
-# #             with col:
-# #                 st.write(f"**File {i+1}:** `{pdf_file.name}`")
-# #                 if st.button(f"Extract Data from File {i+1}", key=f"extract_btn_{i}"):
-# #                     with st.spinner(f"Extracting data from File {i+1} using {MODEL}..."):
-# #                         for schema in response_schema:
-# #                             result = gemini_model.extract_emissions_data_as_json(API_1, MODEL, pdf_file, schema)
-# #                             if schema == GEMINI_GHG_PARAMETERS:
-# #                                 column = "Greenhouse Gas (GHG) Protocol Parameters"
-# #                             elif schema == GEMINI_ENVIRONMENTAL_PARAMETERS_CSRD:
-# #                                 column = "Environmental Parameters (CSRD)"
-# #                             elif schema == GEMINI_ENVIRONMENT_PARAMETERS:
-# #                                 column = "Environmental Parameters"
-# #                             elif schema == GEMINI_SOCIAL_PARAMETERS:
-# #                                 column = "Social Parameters"
-# #                             elif schema == GEMINI_GOVERNANCE_PARAMETERS:
-# #                                 column = "Governance Parameters"
-# #                             elif schema == GEMINI_MATERIALITY_PARAMETERS:
-# #                                 column = "Materiality Parameters"
-# #                             elif schema == GEMINI_NET_ZERO_INTERVENTION_PARAMETERS:
-# #                                 column = "Net Zero Intervention Parameters"
-# #                             else:
-# #                                 column = None
-# #                             test.export_results_to_excel(result, sheet_name=MODEL, filename=file_name, column=column )
-# #                             st.session_state[result_key] = result
-# #                 if st.session_state.get(result_key):
-# #                     st.write(f"**Extracted Metrics for File {i+1}:**")
-# #                     st.json(st.session_state[result_key])

 import streamlit as st
 import pandas as pd
 from src.utils import streamlit_function
+from src.utils.logger import get_logger
+from src.services.mongo_db_service import retrieve_documents
+from src.utils.common_functions import prepare_comparison_df
+logger = get_logger()
 streamlit_function.config_homepage()
+st.title("📊 ESG Report Comparison Dashboard")
+METRIC_OPTIONS = {
+    "Report Metadata": ["report_metadata"],
+    "Environmental Parameters": [
+        "Emissions", "Energy Consumption", "Water Withdrawal", "Water Discharge",
+        "Waste Generation", "Waste Disposal", "Waste Recovery"
+    ],
+    "Social Parameters": [
+        "Human Rights Training Coverage", "LTIFR", "Other Safety Incidents",
+        "Health & Safety Training Coverage", "Grievances Reported",
+        "Third-party Assessment Coverage", "CSR Beneficiaries", "Female Wage Share",
+        "Wages by Location", "Well-being Cost", "Worker Well-being Coverage",
+        "Employee Well-being Coverage", "Turnover Count", "Workforce Gender Diversity"
+    ],
+    "Governance Parameters": [
+        "Non-compliance Instances", "Disciplinary Actions", "Consumer Complaints",
+        "Customer Data Breaches", "Governance Diversity", "Purchase Concentration",
+        "Sales Concentration", "Related Party Transactions"
+    ],
+    "Materiality": ["material_topics"]
+}
+ESG_EXTRACTOR_COLLECTION = "esg_report_extracts"
+company_docs = retrieve_documents(collection_name=ESG_EXTRACTOR_COLLECTION)
+available_company_data = [doc["_id"] for doc in company_docs if "_id" in doc]
+selected_companies = st.multiselect(
+    "Select up to 3 companies",
+    options=available_company_data,
+    max_selections=3
+)
+def get_all_years(docs) -> list:
+    years = set()
+    for doc in docs:
+        if "esg_reports" in doc and isinstance(doc["esg_reports"], dict):
+            years.update(doc["esg_reports"].keys())
+    return sorted(years, reverse=True)
+def highlight_missing_values(df):
+    return df.style.map(lambda v: "background-color: #ffe6e6" if pd.isna(v) or str(v).strip() in ["", "nan", "None", "Not Available","N/A"] else "background-color: #e6ffe6")
+def extract_company_name_from_doc(doc, default_name):
+    return doc.get("report_metadata", {}).get("company_legal_name", default_name)
+if selected_companies:
+    all_years = get_all_years(company_docs)
+    selected_year = st.selectbox(
+        "Select a report year (applies to all selected companies)",
+        options=["-- Select Year --"] + all_years,
+        key="common_year"
+    )
+    if selected_year != "-- Select Year --":
+        tabs = st.tabs(list(METRIC_OPTIONS.keys()))
+        metric_categories = list(METRIC_OPTIONS.keys())
+        for i, tab in enumerate(tabs):
+            with tab:
+                st.subheader(metric_categories[i])
+                metric_keys = METRIC_OPTIONS[metric_categories[i]]
+                for metric in metric_keys:
+                    st.markdown(f"### {metric}")
+                    comparison_df = prepare_comparison_df(
+                        selected_companies,
+                        selected_year,
+                        metric,
+                        company_docs
+                    )
+                    if comparison_df is not None:
+                        st.dataframe(highlight_missing_values(comparison_df), use_container_width=True)
+                    else:
+                        st.warning(f"No data found for **{metric}** in {selected_year}")
+    else:
+        st.info("Please select a year to view report comparisons.")
+else:
+    st.info("Please select at least one company to continue.")

pages/database.py DELETED Viewed

@@ -1,92 +0,0 @@
-import streamlit as st
-import pandas as pd
-from src.utils import streamlit_function
-from src.utils.logger import get_logger
-from src.services.mongo_db_service import retrieve_documents
-from src.utils.common_functions import prepare_comparison_df
-logger = get_logger()
-streamlit_function.config_homepage()
-st.title("📊 ESG Report Comparison Dashboard")
-METRIC_OPTIONS = {
-    "Report Metadata": ["report_metadata"],
-    "Environmental Parameters": [
-        "Emissions", "Energy Consumption", "Water Withdrawal", "Water Discharge",
-        "Waste Generation", "Waste Disposal", "Waste Recovery"
-    ],
-    "Social Parameters": [
-        "Human Rights Training Coverage", "LTIFR", "Other Safety Incidents",
-        "Health & Safety Training Coverage", "Grievances Reported",
-        "Third-party Assessment Coverage", "CSR Beneficiaries", "Female Wage Share",
-        "Wages by Location", "Well-being Cost", "Worker Well-being Coverage",
-        "Employee Well-being Coverage", "Turnover Count", "Workforce Gender Diversity"
-    ],
-    "Governance Parameters": [
-        "Non-compliance Instances", "Disciplinary Actions", "Consumer Complaints",
-        "Customer Data Breaches", "Governance Diversity", "Purchase Concentration",
-        "Sales Concentration", "Related Party Transactions"
-    ],
-    "Materiality": ["material_topics"]
-}
-ESG_EXTRACTOR_COLLECTION = "esg_report_extracts"
-company_docs = retrieve_documents(collection_name=ESG_EXTRACTOR_COLLECTION)
-available_company_data = [doc["_id"] for doc in company_docs if "_id" in doc]
-selected_companies = st.multiselect(
-    "Select up to 3 companies",
-    options=available_company_data,
-    max_selections=3
-)
-def get_all_years(docs) -> list:
-    years = set()
-    for doc in docs:
-        if "esg_reports" in doc and isinstance(doc["esg_reports"], dict):
-            years.update(doc["esg_reports"].keys())
-    return sorted(years, reverse=True)
-def highlight_missing_values(df):
-    return df.style.map(lambda v: "background-color: #ffe6e6" if pd.isna(v) or str(v).strip() in ["", "nan", "None", "Not Available","N/A"] else "background-color: #e6ffe6")
-def extract_company_name_from_doc(doc, default_name):
-    return doc.get("report_metadata", {}).get("company_legal_name", default_name)
-if selected_companies:
-    all_years = get_all_years(company_docs)
-    selected_year = st.selectbox(
-        "Select a report year (applies to all selected companies)",
-        options=["-- Select Year --"] + all_years,
-        key="common_year"
-    )
-    if selected_year != "-- Select Year --":
-        tabs = st.tabs(list(METRIC_OPTIONS.keys()))
-        metric_categories = list(METRIC_OPTIONS.keys())
-        for i, tab in enumerate(tabs):
-            with tab:
-                st.subheader(metric_categories[i])
-                metric_keys = METRIC_OPTIONS[metric_categories[i]]
-                for metric in metric_keys:
-                    st.markdown(f"### {metric}")
-                    comparison_df = prepare_comparison_df(
-                        selected_companies,
-                        selected_year,
-                        metric,
-                        company_docs
-                    )
-                    if comparison_df is not None:
-                        st.dataframe(highlight_missing_values(comparison_df), use_container_width=True)
-                    else:
-                        st.warning(f"No data found for **{metric}** in {selected_year}")
-    else:
-        st.info("Please select a year to view report comparisons.")
-else:
-    st.info("Please select at least one company to continue.")

src/utils/__pycache__/common_functions.cpython-313.pyc CHANGED Viewed

Binary files a/src/utils/__pycache__/common_functions.cpython-313.pyc and b/src/utils/__pycache__/common_functions.cpython-313.pyc differ

src/utils/__pycache__/streamlit_function.cpython-313.pyc CHANGED Viewed

Binary files a/src/utils/__pycache__/streamlit_function.cpython-313.pyc and b/src/utils/__pycache__/streamlit_function.cpython-313.pyc differ