Spaces:
Sleeping
Sleeping
Update ord.py
Browse files
ord.py
CHANGED
|
@@ -11,6 +11,18 @@ from docx.enum.section import WD_SECTION
|
|
| 11 |
from docx.oxml import parse_xml
|
| 12 |
from docx.oxml.ns import nsdecls
|
| 13 |
from docx.oxml.shared import OxmlElement, qn
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
|
| 15 |
THEME_COLOR_HEX = "5FFFDF" # Hex version for XML elements
|
| 16 |
THEME_COLOR = RGBColor.from_string(THEME_COLOR_HEX)
|
|
@@ -1468,7 +1480,23 @@ def process_excel_to_word(excel_file_path, output_word_path, display_name=None,
|
|
| 1468 |
# Read the Excel file
|
| 1469 |
xls = pd.ExcelFile(excel_file_path)
|
| 1470 |
first_sheet_name = xls.sheet_names[0] # Get the first sheet name
|
| 1471 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1472 |
|
| 1473 |
# Extract display name if not provided
|
| 1474 |
if display_name is None:
|
|
@@ -1519,12 +1547,19 @@ def process_excel_to_word(excel_file_path, output_word_path, display_name=None,
|
|
| 1519 |
modules_data = {}
|
| 1520 |
xls = pd.ExcelFile(excel_file_path)
|
| 1521 |
|
| 1522 |
-
|
| 1523 |
-
|
| 1524 |
-
|
| 1525 |
-
|
| 1526 |
-
|
| 1527 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1528 |
|
| 1529 |
# Clean column names
|
| 1530 |
questions_df.columns = questions_df.columns.str.strip()
|
|
@@ -1532,6 +1567,10 @@ def process_excel_to_word(excel_file_path, output_word_path, display_name=None,
|
|
| 1532 |
# Create Word document
|
| 1533 |
doc = Document()
|
| 1534 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1535 |
core_props = doc.core_properties
|
| 1536 |
core_props.author = "Natural Killer"
|
| 1537 |
core_props.title = "Manhattan Project"
|
|
|
|
| 11 |
from docx.oxml import parse_xml
|
| 12 |
from docx.oxml.ns import nsdecls
|
| 13 |
from docx.oxml.shared import OxmlElement, qn
|
| 14 |
+
from concurrent.futures import ThreadPoolExecutor
|
| 15 |
+
from functools import lru_cache
|
| 16 |
+
from concurrent.futures import ThreadPoolExecutor
|
| 17 |
+
import threading
|
| 18 |
+
|
| 19 |
+
# Thread pool for I/O operations
|
| 20 |
+
_io_executor = ThreadPoolExecutor(max_workers=4)
|
| 21 |
+
|
| 22 |
+
@lru_cache(maxsize=128)
|
| 23 |
+
def _cached_excel_read(file_path, sheet_name):
|
| 24 |
+
"""Cache Excel reads"""
|
| 25 |
+
return pd.read_excel(file_path, sheet_name=sheet_name)
|
| 26 |
|
| 27 |
THEME_COLOR_HEX = "5FFFDF" # Hex version for XML elements
|
| 28 |
THEME_COLOR = RGBColor.from_string(THEME_COLOR_HEX)
|
|
|
|
| 1480 |
# Read the Excel file
|
| 1481 |
xls = pd.ExcelFile(excel_file_path)
|
| 1482 |
first_sheet_name = xls.sheet_names[0] # Get the first sheet name
|
| 1483 |
+
all_sheets = xls.sheet_names
|
| 1484 |
+
|
| 1485 |
+
with ThreadPoolExecutor(max_workers=min(len(all_sheets), 8)) as executor:
|
| 1486 |
+
future_to_sheet = {
|
| 1487 |
+
executor.submit(pd.read_excel, excel_file_path, sheet_name=sheet): sheet
|
| 1488 |
+
for sheet in all_sheets[:10] # Limit to first 10 sheets
|
| 1489 |
+
}
|
| 1490 |
+
|
| 1491 |
+
sheets_data = {}
|
| 1492 |
+
for future in future_to_sheet:
|
| 1493 |
+
sheet = future_to_sheet[future]
|
| 1494 |
+
try:
|
| 1495 |
+
sheets_data[sheet] = future.result()
|
| 1496 |
+
except Exception as e:
|
| 1497 |
+
print(f"Error reading {sheet}: {e}")
|
| 1498 |
+
|
| 1499 |
+
questions_df = sheets_data[first_sheet_name]
|
| 1500 |
|
| 1501 |
# Extract display name if not provided
|
| 1502 |
if display_name is None:
|
|
|
|
| 1547 |
modules_data = {}
|
| 1548 |
xls = pd.ExcelFile(excel_file_path)
|
| 1549 |
|
| 1550 |
+
# Parallel module reading
|
| 1551 |
+
with ThreadPoolExecutor(max_workers=min(len(modules), 8)) as executor:
|
| 1552 |
+
future_to_module = {
|
| 1553 |
+
executor.submit(read_course_titles_from_module_sheet, excel_file_path, module): module
|
| 1554 |
+
for module in modules
|
| 1555 |
+
}
|
| 1556 |
+
|
| 1557 |
+
for future in future_to_module:
|
| 1558 |
+
module = future_to_module[future]
|
| 1559 |
+
try:
|
| 1560 |
+
modules_data[module] = future.result()
|
| 1561 |
+
except Exception as e:
|
| 1562 |
+
print(f"DEBUG: Error reading module '{module}': {e}")
|
| 1563 |
|
| 1564 |
# Clean column names
|
| 1565 |
questions_df.columns = questions_df.columns.str.strip()
|
|
|
|
| 1567 |
# Create Word document
|
| 1568 |
doc = Document()
|
| 1569 |
|
| 1570 |
+
# CRITICAL: Disable slow style updates
|
| 1571 |
+
doc.settings.update_fields_on_open = False
|
| 1572 |
+
doc.settings.odd_and_even_pages_header_footer = False
|
| 1573 |
+
|
| 1574 |
core_props = doc.core_properties
|
| 1575 |
core_props.author = "Natural Killer"
|
| 1576 |
core_props.title = "Manhattan Project"
|