TiH0 commited on
Commit
73db34a
·
verified ·
1 Parent(s): 6f2a3be

Update ord.py

Browse files
Files changed (1) hide show
  1. ord.py +46 -7
ord.py CHANGED
@@ -11,6 +11,18 @@ from docx.enum.section import WD_SECTION
11
  from docx.oxml import parse_xml
12
  from docx.oxml.ns import nsdecls
13
  from docx.oxml.shared import OxmlElement, qn
 
 
 
 
 
 
 
 
 
 
 
 
14
 
15
  THEME_COLOR_HEX = "5FFFDF" # Hex version for XML elements
16
  THEME_COLOR = RGBColor.from_string(THEME_COLOR_HEX)
@@ -1468,7 +1480,23 @@ def process_excel_to_word(excel_file_path, output_word_path, display_name=None,
1468
  # Read the Excel file
1469
  xls = pd.ExcelFile(excel_file_path)
1470
  first_sheet_name = xls.sheet_names[0] # Get the first sheet name
1471
- questions_df = pd.read_excel(excel_file_path, sheet_name=first_sheet_name)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1472
 
1473
  # Extract display name if not provided
1474
  if display_name is None:
@@ -1519,12 +1547,19 @@ def process_excel_to_word(excel_file_path, output_word_path, display_name=None,
1519
  modules_data = {}
1520
  xls = pd.ExcelFile(excel_file_path)
1521
 
1522
- for module in modules:
1523
- try:
1524
- cours_titles_for_module = read_course_titles_from_module_sheet(excel_file_path, module)
1525
- modules_data[module] = cours_titles_for_module
1526
- except Exception as e:
1527
- print(f"DEBUG: Error reading module '{module}': {e}")
 
 
 
 
 
 
 
1528
 
1529
  # Clean column names
1530
  questions_df.columns = questions_df.columns.str.strip()
@@ -1532,6 +1567,10 @@ def process_excel_to_word(excel_file_path, output_word_path, display_name=None,
1532
  # Create Word document
1533
  doc = Document()
1534
 
 
 
 
 
1535
  core_props = doc.core_properties
1536
  core_props.author = "Natural Killer"
1537
  core_props.title = "Manhattan Project"
 
11
  from docx.oxml import parse_xml
12
  from docx.oxml.ns import nsdecls
13
  from docx.oxml.shared import OxmlElement, qn
14
+ from concurrent.futures import ThreadPoolExecutor
15
+ from functools import lru_cache
16
+ from concurrent.futures import ThreadPoolExecutor
17
+ import threading
18
+
19
+ # Thread pool for I/O operations
20
+ _io_executor = ThreadPoolExecutor(max_workers=4)
21
+
22
+ @lru_cache(maxsize=128)
23
+ def _cached_excel_read(file_path, sheet_name):
24
+ """Cache Excel reads"""
25
+ return pd.read_excel(file_path, sheet_name=sheet_name)
26
 
27
  THEME_COLOR_HEX = "5FFFDF" # Hex version for XML elements
28
  THEME_COLOR = RGBColor.from_string(THEME_COLOR_HEX)
 
1480
  # Read the Excel file
1481
  xls = pd.ExcelFile(excel_file_path)
1482
  first_sheet_name = xls.sheet_names[0] # Get the first sheet name
1483
+ all_sheets = xls.sheet_names
1484
+
1485
+ with ThreadPoolExecutor(max_workers=min(len(all_sheets), 8)) as executor:
1486
+ future_to_sheet = {
1487
+ executor.submit(pd.read_excel, excel_file_path, sheet_name=sheet): sheet
1488
+ for sheet in all_sheets[:10] # Limit to first 10 sheets
1489
+ }
1490
+
1491
+ sheets_data = {}
1492
+ for future in future_to_sheet:
1493
+ sheet = future_to_sheet[future]
1494
+ try:
1495
+ sheets_data[sheet] = future.result()
1496
+ except Exception as e:
1497
+ print(f"Error reading {sheet}: {e}")
1498
+
1499
+ questions_df = sheets_data[first_sheet_name]
1500
 
1501
  # Extract display name if not provided
1502
  if display_name is None:
 
1547
  modules_data = {}
1548
  xls = pd.ExcelFile(excel_file_path)
1549
 
1550
+ # Parallel module reading
1551
+ with ThreadPoolExecutor(max_workers=min(len(modules), 8)) as executor:
1552
+ future_to_module = {
1553
+ executor.submit(read_course_titles_from_module_sheet, excel_file_path, module): module
1554
+ for module in modules
1555
+ }
1556
+
1557
+ for future in future_to_module:
1558
+ module = future_to_module[future]
1559
+ try:
1560
+ modules_data[module] = future.result()
1561
+ except Exception as e:
1562
+ print(f"DEBUG: Error reading module '{module}': {e}")
1563
 
1564
  # Clean column names
1565
  questions_df.columns = questions_df.columns.str.strip()
 
1567
  # Create Word document
1568
  doc = Document()
1569
 
1570
+ # CRITICAL: Disable slow style updates
1571
+ doc.settings.update_fields_on_open = False
1572
+ doc.settings.odd_and_even_pages_header_footer = False
1573
+
1574
  core_props = doc.core_properties
1575
  core_props.author = "Natural Killer"
1576
  core_props.title = "Manhattan Project"