Spaces:
Sleeping
Sleeping
| import { docs_v1 } from 'googleapis'; | |
| import * as GDocsHelpers from '../../googleDocsApiHelpers.js'; | |
| export interface ExtractedTableCell { | |
| rowIndex: number; | |
| columnIndex: number; | |
| startIndex: number | null; | |
| endIndex: number | null; | |
| contentStartIndex: number | null; | |
| contentEndIndex: number | null; | |
| text: string; | |
| } | |
| export interface ExtractedTable { | |
| tableId: string; | |
| ordinal: number; | |
| startIndex: number | null; | |
| endIndex: number | null; | |
| rowCount: number; | |
| columnCount: number; | |
| cells: ExtractedTableCell[]; | |
| } | |
| export interface ExtractedHeading { | |
| headingText: string; | |
| headingLevel: string; | |
| startIndex: number | null; | |
| endIndex: number | null; | |
| tableIdFollowing?: string; | |
| } | |
| export interface ExtractedTableColumnStyle { | |
| columnIndex: number; | |
| widthPt?: number; | |
| widthType?: string | null; | |
| } | |
| export interface ExtractedTableRowStyle { | |
| rowIndex: number; | |
| minRowHeightPt?: number; | |
| preventOverflow?: boolean; | |
| tableHeader?: boolean; | |
| } | |
| export interface ExtractedTableCellStyle { | |
| rowIndex: number; | |
| columnIndex: number; | |
| backgroundColor?: docs_v1.Schema$RgbColor; | |
| contentAlignment?: 'CONTENT_ALIGNMENT_UNSPECIFIED' | 'TOP' | 'MIDDLE' | 'BOTTOM' | null; | |
| paddingTopPt?: number; | |
| paddingBottomPt?: number; | |
| paddingLeftPt?: number; | |
| paddingRightPt?: number; | |
| borderTop?: docs_v1.Schema$TableCellBorder; | |
| borderBottom?: docs_v1.Schema$TableCellBorder; | |
| borderLeft?: docs_v1.Schema$TableCellBorder; | |
| borderRight?: docs_v1.Schema$TableCellBorder; | |
| hasBoldText?: boolean; | |
| } | |
| export interface ExtractedTableSnapshot { | |
| tableId: string; | |
| startIndex: number | null; | |
| endIndex: number | null; | |
| rowCount: number; | |
| columnCount: number; | |
| data: string[][]; | |
| columnStyles: ExtractedTableColumnStyle[]; | |
| rowStyles: ExtractedTableRowStyle[]; | |
| cellStyles: ExtractedTableCellStyle[]; | |
| pinnedHeaderRowsCount: number; | |
| } | |
| function getContentSource( | |
| doc: docs_v1.Schema$Document, | |
| tabId?: string | |
| ): docs_v1.Schema$StructuralElement[] { | |
| if (tabId) { | |
| const targetTab = GDocsHelpers.findTabById(doc, tabId); | |
| if (!targetTab?.documentTab?.body?.content) { | |
| return []; | |
| } | |
| return targetTab.documentTab.body.content; | |
| } | |
| if (doc.body?.content) { | |
| return doc.body.content; | |
| } | |
| if (doc.tabs?.[0]?.documentTab?.body?.content) { | |
| return doc.tabs[0].documentTab.body.content; | |
| } | |
| return []; | |
| } | |
| function extractParagraphText(paragraph?: docs_v1.Schema$Paragraph): string { | |
| return ( | |
| paragraph?.elements | |
| ?.map((element) => element.textRun?.content ?? '') | |
| .join('') | |
| .replace(/\n+$/g, '') ?? '' | |
| ); | |
| } | |
| function extractCellText(content: docs_v1.Schema$StructuralElement[] = []): string { | |
| const parts: string[] = []; | |
| for (const element of content) { | |
| if (element.paragraph) { | |
| const text = extractParagraphText(element.paragraph); | |
| if (text) parts.push(text); | |
| } | |
| if (element.table?.tableRows) { | |
| for (const row of element.table.tableRows) { | |
| for (const cell of row.tableCells ?? []) { | |
| const text = extractCellText(cell.content ?? []); | |
| if (text) parts.push(text); | |
| } | |
| } | |
| } | |
| } | |
| return parts.join('\n').trim(); | |
| } | |
| function extractCellContentRange(content: docs_v1.Schema$StructuralElement[] = []): { | |
| contentStartIndex: number | null; | |
| contentEndIndex: number | null; | |
| } { | |
| let minStart: number | null = null; | |
| let maxEnd: number | null = null; | |
| const visitContent = (elements: docs_v1.Schema$StructuralElement[]) => { | |
| for (const element of elements) { | |
| for (const paragraphElement of element.paragraph?.elements ?? []) { | |
| const startIndex = paragraphElement.startIndex; | |
| if (typeof startIndex === 'number') { | |
| minStart = minStart === null ? startIndex : Math.min(minStart, startIndex); | |
| } | |
| const endIndex = paragraphElement.endIndex; | |
| if (typeof endIndex === 'number') { | |
| maxEnd = maxEnd === null ? endIndex : Math.max(maxEnd, endIndex); | |
| } | |
| } | |
| if (element.table?.tableRows) { | |
| for (const row of element.table.tableRows) { | |
| for (const cell of row.tableCells ?? []) { | |
| visitContent(cell.content ?? []); | |
| } | |
| } | |
| } | |
| } | |
| }; | |
| visitContent(content); | |
| return { | |
| contentStartIndex: minStart, | |
| contentEndIndex: maxEnd, | |
| }; | |
| } | |
| function dimensionToPt(dimension?: docs_v1.Schema$Dimension): number | undefined { | |
| if (!dimension?.magnitude || dimension.unit !== 'PT') return undefined; | |
| return dimension.magnitude; | |
| } | |
| function normalizeCellStyle( | |
| rowIndex: number, | |
| columnIndex: number, | |
| cell: docs_v1.Schema$TableCell | |
| ): ExtractedTableCellStyle | null { | |
| const style = cell.tableCellStyle; | |
| const firstParagraphHasBoldText = (cell.content ?? []).some((element) => | |
| (element.paragraph?.elements ?? []).some( | |
| (paragraphElement) => paragraphElement.textRun?.textStyle?.bold | |
| ) | |
| ); | |
| if (!style && !firstParagraphHasBoldText) return null; | |
| const contentAlignment = | |
| style?.contentAlignment === 'TOP' || | |
| style?.contentAlignment === 'MIDDLE' || | |
| style?.contentAlignment === 'BOTTOM' || | |
| style?.contentAlignment === 'CONTENT_ALIGNMENT_UNSPECIFIED' | |
| ? style.contentAlignment | |
| : null; | |
| return { | |
| rowIndex, | |
| columnIndex, | |
| backgroundColor: style?.backgroundColor?.color?.rgbColor ?? undefined, | |
| contentAlignment, | |
| paddingTopPt: dimensionToPt(style?.paddingTop), | |
| paddingBottomPt: dimensionToPt(style?.paddingBottom), | |
| paddingLeftPt: dimensionToPt(style?.paddingLeft), | |
| paddingRightPt: dimensionToPt(style?.paddingRight), | |
| borderTop: style?.borderTop ?? undefined, | |
| borderBottom: style?.borderBottom ?? undefined, | |
| borderLeft: style?.borderLeft ?? undefined, | |
| borderRight: style?.borderRight ?? undefined, | |
| hasBoldText: firstParagraphHasBoldText || undefined, | |
| }; | |
| } | |
| export function extractDocumentTables( | |
| doc: docs_v1.Schema$Document, | |
| tabId?: string | |
| ): ExtractedTable[] { | |
| const content = getContentSource(doc, tabId); | |
| const tables: ExtractedTable[] = []; | |
| const tabKey = tabId ?? 'body'; | |
| for (const element of content) { | |
| if (!element.table?.tableRows) continue; | |
| const ordinal = tables.length; | |
| const cells: ExtractedTableCell[] = []; | |
| let columnCount = 0; | |
| element.table.tableRows.forEach((row, rowIndex) => { | |
| const rowCells = row.tableCells ?? []; | |
| columnCount = Math.max(columnCount, rowCells.length); | |
| rowCells.forEach((cell, columnIndex) => { | |
| const { contentStartIndex, contentEndIndex } = extractCellContentRange(cell.content ?? []); | |
| cells.push({ | |
| rowIndex, | |
| columnIndex, | |
| startIndex: cell.startIndex ?? null, | |
| endIndex: cell.endIndex ?? null, | |
| contentStartIndex, | |
| contentEndIndex, | |
| text: extractCellText(cell.content ?? []), | |
| }); | |
| }); | |
| }); | |
| tables.push({ | |
| tableId: `table:${tabKey}:${ordinal}`, | |
| ordinal, | |
| startIndex: element.startIndex ?? null, | |
| endIndex: element.endIndex ?? null, | |
| rowCount: element.table.tableRows.length, | |
| columnCount, | |
| cells, | |
| }); | |
| } | |
| return tables; | |
| } | |
| export function getTableById( | |
| doc: docs_v1.Schema$Document, | |
| tableId: string, | |
| tabId?: string | |
| ): ExtractedTable | null { | |
| return extractDocumentTables(doc, tabId).find((table) => table.tableId === tableId) ?? null; | |
| } | |
| export function findTableNearestStartIndex( | |
| doc: docs_v1.Schema$Document, | |
| insertionIndex: number, | |
| tabId?: string | |
| ): ExtractedTable | null { | |
| const tables = extractDocumentTables(doc, tabId).filter( | |
| (table) => typeof table.startIndex === 'number' && table.startIndex >= insertionIndex | |
| ); | |
| if (tables.length === 0) return null; | |
| return tables.sort((a, b) => a.startIndex! - b.startIndex!)[0] ?? null; | |
| } | |
| export function extractTableSnapshot( | |
| doc: docs_v1.Schema$Document, | |
| tableId: string, | |
| tabId?: string | |
| ): ExtractedTableSnapshot | null { | |
| const content = getContentSource(doc, tabId); | |
| const tabKey = tabId ?? 'body'; | |
| let ordinal = 0; | |
| for (const element of content) { | |
| if (!element.table?.tableRows) continue; | |
| const currentTableId = `table:${tabKey}:${ordinal}`; | |
| ordinal++; | |
| if (currentTableId !== tableId) continue; | |
| const data: string[][] = []; | |
| const rowStyles: ExtractedTableRowStyle[] = []; | |
| const cellStyles: ExtractedTableCellStyle[] = []; | |
| let pinnedHeaderRowsCount = 0; | |
| element.table.tableRows.forEach((row, rowIndex) => { | |
| const rowData: string[] = []; | |
| const rowStyle = row.tableRowStyle; | |
| if (rowStyle) { | |
| rowStyles.push({ | |
| rowIndex, | |
| minRowHeightPt: dimensionToPt(rowStyle.minRowHeight), | |
| preventOverflow: rowStyle.preventOverflow ?? undefined, | |
| tableHeader: rowStyle.tableHeader ?? undefined, | |
| }); | |
| } | |
| if ((rowStyle?.tableHeader ?? false) && pinnedHeaderRowsCount === rowIndex) { | |
| pinnedHeaderRowsCount++; | |
| } | |
| (row.tableCells ?? []).forEach((cell, columnIndex) => { | |
| rowData.push(extractCellText(cell.content ?? [])); | |
| const cellStyle = normalizeCellStyle(rowIndex, columnIndex, cell); | |
| if (cellStyle) cellStyles.push(cellStyle); | |
| }); | |
| data.push(rowData); | |
| }); | |
| const columnStyles: ExtractedTableColumnStyle[] = | |
| element.table.tableStyle?.tableColumnProperties?.map((column, columnIndex) => ({ | |
| columnIndex, | |
| widthPt: dimensionToPt(column.width), | |
| widthType: column.widthType, | |
| })) ?? []; | |
| return { | |
| tableId: currentTableId, | |
| startIndex: element.startIndex ?? null, | |
| endIndex: element.endIndex ?? null, | |
| rowCount: element.table.rows ?? data.length, | |
| columnCount: element.table.columns ?? Math.max(...data.map((row) => row.length), 0), | |
| data, | |
| columnStyles, | |
| rowStyles, | |
| cellStyles, | |
| pinnedHeaderRowsCount, | |
| }; | |
| } | |
| return null; | |
| } | |
| export function findHeadings( | |
| doc: docs_v1.Schema$Document, | |
| headings: string[], | |
| tabId?: string | |
| ): ExtractedHeading[] { | |
| const content = getContentSource(doc, tabId); | |
| const normalizedTargets = new Set(headings.map((heading) => heading.trim())); | |
| const tables = extractDocumentTables(doc, tabId); | |
| const results: ExtractedHeading[] = []; | |
| let seenTables = 0; | |
| for (let index = 0; index < content.length; index++) { | |
| const element = content[index]; | |
| if (element.table?.tableRows) { | |
| seenTables++; | |
| continue; | |
| } | |
| const namedStyleType = element.paragraph?.paragraphStyle?.namedStyleType; | |
| if (!namedStyleType || !namedStyleType.startsWith('HEADING_')) continue; | |
| const headingText = extractParagraphText(element.paragraph).trim(); | |
| if (!normalizedTargets.has(headingText)) continue; | |
| let tableIdFollowing: string | undefined; | |
| for (let nextIndex = index + 1; nextIndex < content.length; nextIndex++) { | |
| const nextElement = content[nextIndex]; | |
| if (nextElement.table?.tableRows) { | |
| tableIdFollowing = tables[seenTables]?.tableId; | |
| break; | |
| } | |
| if (nextElement.paragraph) { | |
| const nextStyle = nextElement.paragraph.paragraphStyle?.namedStyleType; | |
| if (nextStyle?.startsWith('HEADING_')) break; | |
| } | |
| } | |
| results.push({ | |
| headingText, | |
| headingLevel: namedStyleType, | |
| startIndex: element.startIndex ?? null, | |
| endIndex: element.endIndex ?? null, | |
| tableIdFollowing, | |
| }); | |
| } | |
| return results; | |
| } | |