Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| from typing import Union, List | |
| import pandas as pd | |
| from io import BytesIO | |
| import json | |
| import os | |
| from openpyxl import load_workbook | |
| from application.utils import logger | |
| logger = logger.get_logger() | |
| PAGE_TITLE = "PDF Extractor" | |
| PAGE_LAYOUT = "wide" | |
| # PAGE_ICON = "src/frontend/images/page_icon.jpg" | |
| # GITHUB_LINK = "https://github.com/Vela-Test1993/yuvabe-care-companion-ai" | |
| # ABOUT_US = "An AI-powered assistant for personalized healthcare guidance." | |
| def config_homepage(page_title=PAGE_TITLE): | |
| """ | |
| Configures the Streamlit homepage with essential settings. | |
| This function sets up the page title, icon, layout, and sidebar state. | |
| It also defines custom menu items for better navigation. | |
| Args: | |
| page_title (str): The title displayed on the browser tab (default is PAGE_TITLE). | |
| Key Features: | |
| - Ensures `st.set_page_config()` is called only once to avoid errors. | |
| - Uses constants for improved maintainability and consistency. | |
| - Provides links for help, bug reporting, and an 'About' section. | |
| Example: | |
| >>> config_homepage("My Custom App") | |
| """ | |
| if "page_config_set" not in st.session_state: | |
| st.set_page_config( | |
| page_title=page_title, | |
| # page_icon=PAGE_ICON, | |
| layout=PAGE_LAYOUT, | |
| initial_sidebar_state="collapsed", | |
| # menu_items={ | |
| # "Get help": GITHUB_LINK, | |
| # "Report a bug": GITHUB_LINK, | |
| # "About": ABOUT_US | |
| # } | |
| ) | |
| # st.session_state.page_config_set = True | |
| def upload_file( | |
| file_types: Union[str, List[str]] = "pdf", | |
| label: str = "π€ Upload a file", | |
| help_text: str = "Upload your file for processing.", | |
| allow_multiple: bool = True, | |
| ): | |
| """ | |
| Streamlit file uploader widget with options. | |
| Args: | |
| file_types (str or list): Allowed file type(s), e.g., "pdf" or ["pdf", "docx"]. | |
| label (str): Label displayed above the uploader. | |
| help_text (str): Tooltip help text. | |
| allow_multiple (bool): Allow multiple file uploads. | |
| Returns: | |
| Uploaded file(s): A single file object or a list of file objects. | |
| """ | |
| if isinstance(file_types, str): | |
| file_types = [file_types] | |
| uploaded_files = st.file_uploader( | |
| label=label, | |
| type=file_types, | |
| help=help_text, | |
| accept_multiple_files=allow_multiple | |
| ) | |
| if st.button("Submit"): | |
| st.session_state.pdf_file = uploaded_files | |
| return uploaded_files | |
| def export_results_to_excel(results: dict, sheet_name: str, filename: str = "output.xlsx", column: str = None) -> BytesIO: | |
| """ | |
| Converts a dictionary result into a formatted Excel file. | |
| Appends to a file in the 'data/' folder if it already exists, | |
| and returns an in-memory Excel file for download. | |
| Args: | |
| results (dict): The data to export. | |
| sheet_name (str): The sheet name to write to. | |
| filename (str): The Excel file name (with or without '.xlsx'). | |
| Returns: | |
| BytesIO: In-memory Excel file for Streamlit download. | |
| """ | |
| try: | |
| if not results: | |
| logger.error("Results object is None or empty.") | |
| return None | |
| filename = filename if filename.endswith(".xlsx") else f"{filename}.xlsx" | |
| data = results.get(column, {}) | |
| logger.info(f"Exporting data for column '{column}' to {filename}") | |
| if not isinstance(data, dict): | |
| logger.error(f"Expected dictionary for column '{column}', but got {type(data)}") | |
| return None | |
| df = pd.DataFrame(data.items(), columns=[column, "Value"]) | |
| df.fillna(0, inplace=True) | |
| os.makedirs("data", exist_ok=True) | |
| physical_path = os.path.join("data", filename) | |
| file_exists = os.path.exists(physical_path) | |
| start_row = 0 | |
| start_column = 0 | |
| if file_exists: | |
| book = load_workbook(physical_path) | |
| if sheet_name in book.sheetnames: | |
| sheet = book[sheet_name] | |
| start_row = sheet.max_row | |
| start_column = sheet.max_column | |
| else: | |
| start_row = 0 | |
| if file_exists: | |
| with pd.ExcelWriter(physical_path, engine='openpyxl', mode='a', if_sheet_exists='overlay') as writer: | |
| df.to_excel(writer, sheet_name=sheet_name, index=False, header=True, startrow=0, startcol=start_column) | |
| else: | |
| with pd.ExcelWriter(physical_path, engine='openpyxl', mode='w') as writer: | |
| df.to_excel(writer, sheet_name=sheet_name, index=False, header=True, startrow=0) | |
| output_stream = BytesIO() | |
| with pd.ExcelWriter(output_stream, engine='openpyxl') as writer: | |
| df.to_excel(writer, sheet_name=sheet_name, index=False) | |
| output_stream.seek(0) | |
| logger.info(f"Data exported to {filename} successfully.") | |
| return output_stream | |
| except Exception as e: | |
| logger.error(f"Error creating Excel export: {e}") | |
| return None |