MikeMai commited on
Commit
c61bff0
·
verified ·
1 Parent(s): dc7b3c3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -6
app.py CHANGED
@@ -920,9 +920,11 @@ def extract_po(docx_path):
920
  # Step 1: Extract XML content from DOCX
921
  print("Extracting Docs data to XML...")
922
  xml_filename = os.path.splitext(os.path.basename(docx_path))[0] + "_document.xml"
923
- xml_file = extract_docx_as_xml(docx_bytes, save_xml=False, xml_filename=xml_filename)
924
-
925
- get_namespace(ET.fromstring(xml_file))
 
 
926
 
927
  # Step 2: Extract tables from DOCX and save JSON
928
  print("Extracting XML data to JSON...")
@@ -974,7 +976,7 @@ def extract_po(docx_path):
974
  # Example Usage
975
 
976
  # extract_po("test-contract-converted.docx")
977
- # extract_po("test-contracts\GN-SMBLMCD202501-032WJ SMB联盟菜地PVC球阀等五金物资采购合同-ZHUOKE.docx")
978
 
979
  # print(extract_price_list([{'序号 No.': '1', '名称 Name': 'PE波纹管(双壁波纹管) PE corrugated pipe (double wall corrugated pipe)', '规格 Specification': '内径600mm,6米/根,SN8 Inner diameter 600mm, 6 meters per piece, SN8', '单位 Unit': '米m', '数量 Quantity': '180', '单价(元) Unit Price (CNY)': '106.00', '总额(元) Total Amount (CNY)': '1080.00', '几郎单价(元) Unit Price (GNF)': '16.21', '几郎总额(元) Total Amount (GNF)': '22118.38', '品牌 Brand': '鹏洲PZ', '计划来源 Planned Source': 'SMB268-GNHY-0021-WJ-20250108'}]))
980
 
@@ -992,5 +994,4 @@ interface = gr.Interface(
992
  theme=Base()
993
  )
994
 
995
- interface.launch()
996
-
 
920
  # Step 1: Extract XML content from DOCX
921
  print("Extracting Docs data to XML...")
922
  xml_filename = os.path.splitext(os.path.basename(docx_path))[0] + "_document.xml"
923
+ try:
924
+ xml_file = extract_docx_as_xml(docx_bytes, save_xml=False, xml_filename=xml_filename)
925
+ get_namespace(ET.fromstring(xml_file))
926
+ except (zipfile.BadZipFile, KeyError):
927
+ raise ValueError(f"Invalid file: {docx_path}")
928
 
929
  # Step 2: Extract tables from DOCX and save JSON
930
  print("Extracting XML data to JSON...")
 
976
  # Example Usage
977
 
978
  # extract_po("test-contract-converted.docx")
979
+ # extract_po("EPC简明合同格式-中英对照版.docx")
980
 
981
  # print(extract_price_list([{'序号 No.': '1', '名称 Name': 'PE波纹管(双壁波纹管) PE corrugated pipe (double wall corrugated pipe)', '规格 Specification': '内径600mm,6米/根,SN8 Inner diameter 600mm, 6 meters per piece, SN8', '单位 Unit': '米m', '数量 Quantity': '180', '单价(元) Unit Price (CNY)': '106.00', '总额(元) Total Amount (CNY)': '1080.00', '几郎单价(元) Unit Price (GNF)': '16.21', '几郎总额(元) Total Amount (GNF)': '22118.38', '品牌 Brand': '鹏洲PZ', '计划来源 Planned Source': 'SMB268-GNHY-0021-WJ-20250108'}]))
982
 
 
994
  theme=Base()
995
  )
996
 
997
+ interface.launch(show_error=True)