Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -920,9 +920,11 @@ def extract_po(docx_path):
|
|
| 920 |
# Step 1: Extract XML content from DOCX
|
| 921 |
print("Extracting Docs data to XML...")
|
| 922 |
xml_filename = os.path.splitext(os.path.basename(docx_path))[0] + "_document.xml"
|
| 923 |
-
|
| 924 |
-
|
| 925 |
-
|
|
|
|
|
|
|
| 926 |
|
| 927 |
# Step 2: Extract tables from DOCX and save JSON
|
| 928 |
print("Extracting XML data to JSON...")
|
|
@@ -974,7 +976,7 @@ def extract_po(docx_path):
|
|
| 974 |
# Example Usage
|
| 975 |
|
| 976 |
# extract_po("test-contract-converted.docx")
|
| 977 |
-
# extract_po("
|
| 978 |
|
| 979 |
# print(extract_price_list([{'序号 No.': '1', '名称 Name': 'PE波纹管(双壁波纹管) PE corrugated pipe (double wall corrugated pipe)', '规格 Specification': '内径600mm,6米/根,SN8 Inner diameter 600mm, 6 meters per piece, SN8', '单位 Unit': '米m', '数量 Quantity': '180', '单价(元) Unit Price (CNY)': '106.00', '总额(元) Total Amount (CNY)': '1080.00', '几郎单价(元) Unit Price (GNF)': '16.21', '几郎总额(元) Total Amount (GNF)': '22118.38', '品牌 Brand': '鹏洲PZ', '计划来源 Planned Source': 'SMB268-GNHY-0021-WJ-20250108'}]))
|
| 980 |
|
|
@@ -992,5 +994,4 @@ interface = gr.Interface(
|
|
| 992 |
theme=Base()
|
| 993 |
)
|
| 994 |
|
| 995 |
-
interface.launch()
|
| 996 |
-
|
|
|
|
| 920 |
# Step 1: Extract XML content from DOCX
|
| 921 |
print("Extracting Docs data to XML...")
|
| 922 |
xml_filename = os.path.splitext(os.path.basename(docx_path))[0] + "_document.xml"
|
| 923 |
+
try:
|
| 924 |
+
xml_file = extract_docx_as_xml(docx_bytes, save_xml=False, xml_filename=xml_filename)
|
| 925 |
+
get_namespace(ET.fromstring(xml_file))
|
| 926 |
+
except (zipfile.BadZipFile, KeyError):
|
| 927 |
+
raise ValueError(f"Invalid file: {docx_path}")
|
| 928 |
|
| 929 |
# Step 2: Extract tables from DOCX and save JSON
|
| 930 |
print("Extracting XML data to JSON...")
|
|
|
|
| 976 |
# Example Usage
|
| 977 |
|
| 978 |
# extract_po("test-contract-converted.docx")
|
| 979 |
+
# extract_po("EPC简明合同格式-中英对照版.docx")
|
| 980 |
|
| 981 |
# print(extract_price_list([{'序号 No.': '1', '名称 Name': 'PE波纹管(双壁波纹管) PE corrugated pipe (double wall corrugated pipe)', '规格 Specification': '内径600mm,6米/根,SN8 Inner diameter 600mm, 6 meters per piece, SN8', '单位 Unit': '米m', '数量 Quantity': '180', '单价(元) Unit Price (CNY)': '106.00', '总额(元) Total Amount (CNY)': '1080.00', '几郎单价(元) Unit Price (GNF)': '16.21', '几郎总额(元) Total Amount (GNF)': '22118.38', '品牌 Brand': '鹏洲PZ', '计划来源 Planned Source': 'SMB268-GNHY-0021-WJ-20250108'}]))
|
| 982 |
|
|
|
|
| 994 |
theme=Base()
|
| 995 |
)
|
| 996 |
|
| 997 |
+
interface.launch(show_error=True)
|
|
|