| from __future__ import annotations |
|
|
| import mmap |
| from typing import ( |
| TYPE_CHECKING, |
| Any, |
| cast, |
| ) |
|
|
| import numpy as np |
|
|
| from pandas.compat._optional import import_optional_dependency |
| from pandas.util._decorators import doc |
|
|
| from pandas.core.shared_docs import _shared_docs |
|
|
| from pandas.io.excel._base import ( |
| BaseExcelReader, |
| ExcelWriter, |
| ) |
| from pandas.io.excel._util import ( |
| combine_kwargs, |
| validate_freeze_panes, |
| ) |
|
|
| if TYPE_CHECKING: |
| from openpyxl import Workbook |
| from openpyxl.descriptors.serialisable import Serialisable |
|
|
| from pandas._typing import ( |
| ExcelWriterIfSheetExists, |
| FilePath, |
| ReadBuffer, |
| Scalar, |
| StorageOptions, |
| WriteExcelBuffer, |
| ) |
|
|
|
|
| class OpenpyxlWriter(ExcelWriter): |
| _engine = "openpyxl" |
| _supported_extensions = (".xlsx", ".xlsm") |
|
|
| def __init__( |
| self, |
| path: FilePath | WriteExcelBuffer | ExcelWriter, |
| engine: str | None = None, |
| date_format: str | None = None, |
| datetime_format: str | None = None, |
| mode: str = "w", |
| storage_options: StorageOptions | None = None, |
| if_sheet_exists: ExcelWriterIfSheetExists | None = None, |
| engine_kwargs: dict[str, Any] | None = None, |
| **kwargs, |
| ) -> None: |
| |
| from openpyxl.workbook import Workbook |
|
|
| engine_kwargs = combine_kwargs(engine_kwargs, kwargs) |
|
|
| super().__init__( |
| path, |
| mode=mode, |
| storage_options=storage_options, |
| if_sheet_exists=if_sheet_exists, |
| engine_kwargs=engine_kwargs, |
| ) |
|
|
| |
| |
| if "r+" in self._mode: |
| from openpyxl import load_workbook |
|
|
| try: |
| self._book = load_workbook(self._handles.handle, **engine_kwargs) |
| except TypeError: |
| self._handles.handle.close() |
| raise |
| self._handles.handle.seek(0) |
| else: |
| |
| try: |
| self._book = Workbook(**engine_kwargs) |
| except TypeError: |
| self._handles.handle.close() |
| raise |
|
|
| if self.book.worksheets: |
| self.book.remove(self.book.worksheets[0]) |
|
|
| @property |
| def book(self) -> Workbook: |
| """ |
| Book instance of class openpyxl.workbook.Workbook. |
| |
| This attribute can be used to access engine-specific features. |
| """ |
| return self._book |
|
|
| @property |
| def sheets(self) -> dict[str, Any]: |
| """Mapping of sheet names to sheet objects.""" |
| result = {name: self.book[name] for name in self.book.sheetnames} |
| return result |
|
|
| def _save(self) -> None: |
| """ |
| Save workbook to disk. |
| """ |
| self.book.save(self._handles.handle) |
| if "r+" in self._mode and not isinstance(self._handles.handle, mmap.mmap): |
| |
| self._handles.handle.truncate() |
|
|
| @classmethod |
| def _convert_to_style_kwargs(cls, style_dict: dict) -> dict[str, Serialisable]: |
| """ |
| Convert a style_dict to a set of kwargs suitable for initializing |
| or updating-on-copy an openpyxl v2 style object. |
| |
| Parameters |
| ---------- |
| style_dict : dict |
| A dict with zero or more of the following keys (or their synonyms). |
| 'font' |
| 'fill' |
| 'border' ('borders') |
| 'alignment' |
| 'number_format' |
| 'protection' |
| |
| Returns |
| ------- |
| style_kwargs : dict |
| A dict with the same, normalized keys as ``style_dict`` but each |
| value has been replaced with a native openpyxl style object of the |
| appropriate class. |
| """ |
| _style_key_map = {"borders": "border"} |
|
|
| style_kwargs: dict[str, Serialisable] = {} |
| for k, v in style_dict.items(): |
| k = _style_key_map.get(k, k) |
| _conv_to_x = getattr(cls, f"_convert_to_{k}", lambda x: None) |
| new_v = _conv_to_x(v) |
| if new_v: |
| style_kwargs[k] = new_v |
|
|
| return style_kwargs |
|
|
| @classmethod |
| def _convert_to_color(cls, color_spec): |
| """ |
| Convert ``color_spec`` to an openpyxl v2 Color object. |
| |
| Parameters |
| ---------- |
| color_spec : str, dict |
| A 32-bit ARGB hex string, or a dict with zero or more of the |
| following keys. |
| 'rgb' |
| 'indexed' |
| 'auto' |
| 'theme' |
| 'tint' |
| 'index' |
| 'type' |
| |
| Returns |
| ------- |
| color : openpyxl.styles.Color |
| """ |
| from openpyxl.styles import Color |
|
|
| if isinstance(color_spec, str): |
| return Color(color_spec) |
| else: |
| return Color(**color_spec) |
|
|
| @classmethod |
| def _convert_to_font(cls, font_dict): |
| """ |
| Convert ``font_dict`` to an openpyxl v2 Font object. |
| |
| Parameters |
| ---------- |
| font_dict : dict |
| A dict with zero or more of the following keys (or their synonyms). |
| 'name' |
| 'size' ('sz') |
| 'bold' ('b') |
| 'italic' ('i') |
| 'underline' ('u') |
| 'strikethrough' ('strike') |
| 'color' |
| 'vertAlign' ('vertalign') |
| 'charset' |
| 'scheme' |
| 'family' |
| 'outline' |
| 'shadow' |
| 'condense' |
| |
| Returns |
| ------- |
| font : openpyxl.styles.Font |
| """ |
| from openpyxl.styles import Font |
|
|
| _font_key_map = { |
| "sz": "size", |
| "b": "bold", |
| "i": "italic", |
| "u": "underline", |
| "strike": "strikethrough", |
| "vertalign": "vertAlign", |
| } |
|
|
| font_kwargs = {} |
| for k, v in font_dict.items(): |
| k = _font_key_map.get(k, k) |
| if k == "color": |
| v = cls._convert_to_color(v) |
| font_kwargs[k] = v |
|
|
| return Font(**font_kwargs) |
|
|
| @classmethod |
| def _convert_to_stop(cls, stop_seq): |
| """ |
| Convert ``stop_seq`` to a list of openpyxl v2 Color objects, |
| suitable for initializing the ``GradientFill`` ``stop`` parameter. |
| |
| Parameters |
| ---------- |
| stop_seq : iterable |
| An iterable that yields objects suitable for consumption by |
| ``_convert_to_color``. |
| |
| Returns |
| ------- |
| stop : list of openpyxl.styles.Color |
| """ |
| return map(cls._convert_to_color, stop_seq) |
|
|
| @classmethod |
| def _convert_to_fill(cls, fill_dict: dict[str, Any]): |
| """ |
| Convert ``fill_dict`` to an openpyxl v2 Fill object. |
| |
| Parameters |
| ---------- |
| fill_dict : dict |
| A dict with one or more of the following keys (or their synonyms), |
| 'fill_type' ('patternType', 'patterntype') |
| 'start_color' ('fgColor', 'fgcolor') |
| 'end_color' ('bgColor', 'bgcolor') |
| or one or more of the following keys (or their synonyms). |
| 'type' ('fill_type') |
| 'degree' |
| 'left' |
| 'right' |
| 'top' |
| 'bottom' |
| 'stop' |
| |
| Returns |
| ------- |
| fill : openpyxl.styles.Fill |
| """ |
| from openpyxl.styles import ( |
| GradientFill, |
| PatternFill, |
| ) |
|
|
| _pattern_fill_key_map = { |
| "patternType": "fill_type", |
| "patterntype": "fill_type", |
| "fgColor": "start_color", |
| "fgcolor": "start_color", |
| "bgColor": "end_color", |
| "bgcolor": "end_color", |
| } |
|
|
| _gradient_fill_key_map = {"fill_type": "type"} |
|
|
| pfill_kwargs = {} |
| gfill_kwargs = {} |
| for k, v in fill_dict.items(): |
| pk = _pattern_fill_key_map.get(k) |
| gk = _gradient_fill_key_map.get(k) |
| if pk in ["start_color", "end_color"]: |
| v = cls._convert_to_color(v) |
| if gk == "stop": |
| v = cls._convert_to_stop(v) |
| if pk: |
| pfill_kwargs[pk] = v |
| elif gk: |
| gfill_kwargs[gk] = v |
| else: |
| pfill_kwargs[k] = v |
| gfill_kwargs[k] = v |
|
|
| try: |
| return PatternFill(**pfill_kwargs) |
| except TypeError: |
| return GradientFill(**gfill_kwargs) |
|
|
| @classmethod |
| def _convert_to_side(cls, side_spec): |
| """ |
| Convert ``side_spec`` to an openpyxl v2 Side object. |
| |
| Parameters |
| ---------- |
| side_spec : str, dict |
| A string specifying the border style, or a dict with zero or more |
| of the following keys (or their synonyms). |
| 'style' ('border_style') |
| 'color' |
| |
| Returns |
| ------- |
| side : openpyxl.styles.Side |
| """ |
| from openpyxl.styles import Side |
|
|
| _side_key_map = {"border_style": "style"} |
|
|
| if isinstance(side_spec, str): |
| return Side(style=side_spec) |
|
|
| side_kwargs = {} |
| for k, v in side_spec.items(): |
| k = _side_key_map.get(k, k) |
| if k == "color": |
| v = cls._convert_to_color(v) |
| side_kwargs[k] = v |
|
|
| return Side(**side_kwargs) |
|
|
| @classmethod |
| def _convert_to_border(cls, border_dict): |
| """ |
| Convert ``border_dict`` to an openpyxl v2 Border object. |
| |
| Parameters |
| ---------- |
| border_dict : dict |
| A dict with zero or more of the following keys (or their synonyms). |
| 'left' |
| 'right' |
| 'top' |
| 'bottom' |
| 'diagonal' |
| 'diagonal_direction' |
| 'vertical' |
| 'horizontal' |
| 'diagonalUp' ('diagonalup') |
| 'diagonalDown' ('diagonaldown') |
| 'outline' |
| |
| Returns |
| ------- |
| border : openpyxl.styles.Border |
| """ |
| from openpyxl.styles import Border |
|
|
| _border_key_map = {"diagonalup": "diagonalUp", "diagonaldown": "diagonalDown"} |
|
|
| border_kwargs = {} |
| for k, v in border_dict.items(): |
| k = _border_key_map.get(k, k) |
| if k == "color": |
| v = cls._convert_to_color(v) |
| if k in ["left", "right", "top", "bottom", "diagonal"]: |
| v = cls._convert_to_side(v) |
| border_kwargs[k] = v |
|
|
| return Border(**border_kwargs) |
|
|
| @classmethod |
| def _convert_to_alignment(cls, alignment_dict): |
| """ |
| Convert ``alignment_dict`` to an openpyxl v2 Alignment object. |
| |
| Parameters |
| ---------- |
| alignment_dict : dict |
| A dict with zero or more of the following keys (or their synonyms). |
| 'horizontal' |
| 'vertical' |
| 'text_rotation' |
| 'wrap_text' |
| 'shrink_to_fit' |
| 'indent' |
| Returns |
| ------- |
| alignment : openpyxl.styles.Alignment |
| """ |
| from openpyxl.styles import Alignment |
|
|
| return Alignment(**alignment_dict) |
|
|
| @classmethod |
| def _convert_to_number_format(cls, number_format_dict): |
| """ |
| Convert ``number_format_dict`` to an openpyxl v2.1.0 number format |
| initializer. |
| |
| Parameters |
| ---------- |
| number_format_dict : dict |
| A dict with zero or more of the following keys. |
| 'format_code' : str |
| |
| Returns |
| ------- |
| number_format : str |
| """ |
| return number_format_dict["format_code"] |
|
|
| @classmethod |
| def _convert_to_protection(cls, protection_dict): |
| """ |
| Convert ``protection_dict`` to an openpyxl v2 Protection object. |
| |
| Parameters |
| ---------- |
| protection_dict : dict |
| A dict with zero or more of the following keys. |
| 'locked' |
| 'hidden' |
| |
| Returns |
| ------- |
| """ |
| from openpyxl.styles import Protection |
|
|
| return Protection(**protection_dict) |
|
|
| def _write_cells( |
| self, |
| cells, |
| sheet_name: str | None = None, |
| startrow: int = 0, |
| startcol: int = 0, |
| freeze_panes: tuple[int, int] | None = None, |
| ) -> None: |
| |
| sheet_name = self._get_sheet_name(sheet_name) |
|
|
| _style_cache: dict[str, dict[str, Serialisable]] = {} |
|
|
| if sheet_name in self.sheets and self._if_sheet_exists != "new": |
| if "r+" in self._mode: |
| if self._if_sheet_exists == "replace": |
| old_wks = self.sheets[sheet_name] |
| target_index = self.book.index(old_wks) |
| del self.book[sheet_name] |
| wks = self.book.create_sheet(sheet_name, target_index) |
| elif self._if_sheet_exists == "error": |
| raise ValueError( |
| f"Sheet '{sheet_name}' already exists and " |
| f"if_sheet_exists is set to 'error'." |
| ) |
| elif self._if_sheet_exists == "overlay": |
| wks = self.sheets[sheet_name] |
| else: |
| raise ValueError( |
| f"'{self._if_sheet_exists}' is not valid for if_sheet_exists. " |
| "Valid options are 'error', 'new', 'replace' and 'overlay'." |
| ) |
| else: |
| wks = self.sheets[sheet_name] |
| else: |
| wks = self.book.create_sheet() |
| wks.title = sheet_name |
|
|
| if validate_freeze_panes(freeze_panes): |
| freeze_panes = cast(tuple[int, int], freeze_panes) |
| wks.freeze_panes = wks.cell( |
| row=freeze_panes[0] + 1, column=freeze_panes[1] + 1 |
| ) |
|
|
| for cell in cells: |
| xcell = wks.cell( |
| row=startrow + cell.row + 1, column=startcol + cell.col + 1 |
| ) |
| xcell.value, fmt = self._value_with_fmt(cell.val) |
| if fmt: |
| xcell.number_format = fmt |
|
|
| style_kwargs: dict[str, Serialisable] | None = {} |
| if cell.style: |
| key = str(cell.style) |
| style_kwargs = _style_cache.get(key) |
| if style_kwargs is None: |
| style_kwargs = self._convert_to_style_kwargs(cell.style) |
| _style_cache[key] = style_kwargs |
|
|
| if style_kwargs: |
| for k, v in style_kwargs.items(): |
| setattr(xcell, k, v) |
|
|
| if cell.mergestart is not None and cell.mergeend is not None: |
| wks.merge_cells( |
| start_row=startrow + cell.row + 1, |
| start_column=startcol + cell.col + 1, |
| end_column=startcol + cell.mergeend + 1, |
| end_row=startrow + cell.mergestart + 1, |
| ) |
|
|
| |
| |
| |
| if style_kwargs: |
| first_row = startrow + cell.row + 1 |
| last_row = startrow + cell.mergestart + 1 |
| first_col = startcol + cell.col + 1 |
| last_col = startcol + cell.mergeend + 1 |
|
|
| for row in range(first_row, last_row + 1): |
| for col in range(first_col, last_col + 1): |
| if row == first_row and col == first_col: |
| |
| continue |
| xcell = wks.cell(column=col, row=row) |
| for k, v in style_kwargs.items(): |
| setattr(xcell, k, v) |
|
|
|
|
| class OpenpyxlReader(BaseExcelReader["Workbook"]): |
| @doc(storage_options=_shared_docs["storage_options"]) |
| def __init__( |
| self, |
| filepath_or_buffer: FilePath | ReadBuffer[bytes], |
| storage_options: StorageOptions | None = None, |
| engine_kwargs: dict | None = None, |
| ) -> None: |
| """ |
| Reader using openpyxl engine. |
| |
| Parameters |
| ---------- |
| filepath_or_buffer : str, path object or Workbook |
| Object to be parsed. |
| {storage_options} |
| engine_kwargs : dict, optional |
| Arbitrary keyword arguments passed to excel engine. |
| """ |
| import_optional_dependency("openpyxl") |
| super().__init__( |
| filepath_or_buffer, |
| storage_options=storage_options, |
| engine_kwargs=engine_kwargs, |
| ) |
|
|
| @property |
| def _workbook_class(self) -> type[Workbook]: |
| from openpyxl import Workbook |
|
|
| return Workbook |
|
|
| def load_workbook( |
| self, filepath_or_buffer: FilePath | ReadBuffer[bytes], engine_kwargs |
| ) -> Workbook: |
| from openpyxl import load_workbook |
|
|
| default_kwargs = {"read_only": True, "data_only": True, "keep_links": False} |
|
|
| return load_workbook( |
| filepath_or_buffer, |
| **(default_kwargs | engine_kwargs), |
| ) |
|
|
| @property |
| def sheet_names(self) -> list[str]: |
| return [sheet.title for sheet in self.book.worksheets] |
|
|
| def get_sheet_by_name(self, name: str): |
| self.raise_if_bad_sheet_by_name(name) |
| return self.book[name] |
|
|
| def get_sheet_by_index(self, index: int): |
| self.raise_if_bad_sheet_by_index(index) |
| return self.book.worksheets[index] |
|
|
| def _convert_cell(self, cell) -> Scalar: |
| from openpyxl.cell.cell import ( |
| TYPE_ERROR, |
| TYPE_NUMERIC, |
| ) |
|
|
| if cell.value is None: |
| return "" |
| elif cell.data_type == TYPE_ERROR: |
| return np.nan |
| elif cell.data_type == TYPE_NUMERIC: |
| val = int(cell.value) |
| if val == cell.value: |
| return val |
| return float(cell.value) |
|
|
| return cell.value |
|
|
| def get_sheet_data( |
| self, sheet, file_rows_needed: int | None = None |
| ) -> list[list[Scalar]]: |
| if self.book.read_only: |
| sheet.reset_dimensions() |
|
|
| data: list[list[Scalar]] = [] |
| last_row_with_data = -1 |
| for row_number, row in enumerate(sheet.rows): |
| converted_row = [self._convert_cell(cell) for cell in row] |
| while converted_row and converted_row[-1] == "": |
| |
| converted_row.pop() |
| if converted_row: |
| last_row_with_data = row_number |
| data.append(converted_row) |
| if file_rows_needed is not None and len(data) >= file_rows_needed: |
| break |
|
|
| |
| data = data[: last_row_with_data + 1] |
|
|
| if len(data) > 0: |
| |
| max_width = max(len(data_row) for data_row in data) |
| if min(len(data_row) for data_row in data) < max_width: |
| empty_cell: list[Scalar] = [""] |
| data = [ |
| data_row + (max_width - len(data_row)) * empty_cell |
| for data_row in data |
| ] |
|
|
| return data |
|
|