Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -985,14 +985,18 @@ Do not force map 名称(英文) to 单价
|
|
| 985 |
|
| 986 |
# Step 1: Handle name fields first - look for any field with "名称" or "name"
|
| 987 |
for header, value in row.items():
|
|
|
|
|
|
|
|
|
|
|
|
|
| 988 |
# Clean the header for comparison
|
| 989 |
-
cleaned_header = re.sub(r'\s+', ' ', header).strip()
|
| 990 |
header_lower = cleaned_header.lower()
|
| 991 |
|
| 992 |
if ("名称" in header_lower or "name" in header_lower) and value:
|
| 993 |
# If field contains both Chinese and English, separate them
|
| 994 |
-
if re.search(r'[\u4e00-\u9fff]', value) and re.search(r'[a-zA-Z]', value):
|
| 995 |
-
chinese, english = separate_chinese_english(value)
|
| 996 |
if chinese:
|
| 997 |
new_row["名称"] = chinese
|
| 998 |
if english:
|
|
@@ -1000,21 +1004,29 @@ Do not force map 名称(英文) to 单价
|
|
| 1000 |
print(f"Separated: '{value}' → Chinese: '{chinese}', English: '{english}'")
|
| 1001 |
else:
|
| 1002 |
# Just set the name directly
|
| 1003 |
-
new_row["名称"] = value
|
| 1004 |
break # Stop after finding first name field
|
| 1005 |
|
| 1006 |
# Step 2: Fill in all other fields using standard mapping
|
| 1007 |
for header, value in row.items():
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1008 |
# Skip empty values
|
| 1009 |
if not value:
|
| 1010 |
continue
|
| 1011 |
|
| 1012 |
# Clean the header for comparison
|
| 1013 |
-
cleaned_header = re.sub(r'\s+', ' ', header).strip()
|
| 1014 |
|
| 1015 |
# Check if this maps to a standard field
|
| 1016 |
matched_field = None
|
| 1017 |
for std_field, mapped_header in standard_field_mapping.items():
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1018 |
# Make comparison more flexible by lowercasing and stripping spaces
|
| 1019 |
if mapped_header.lower().strip() == cleaned_header.lower().strip():
|
| 1020 |
matched_field = std_field
|
|
@@ -1023,13 +1035,13 @@ Do not force map 名称(英文) to 单价
|
|
| 1023 |
# If we found a mapping, use it (but don't overwrite name fields)
|
| 1024 |
if matched_field:
|
| 1025 |
if matched_field not in ["名称", "名称(英文)"] or not new_row[matched_field]:
|
| 1026 |
-
new_row[matched_field] = value
|
| 1027 |
# If no mapping found, add to other_fields
|
| 1028 |
else:
|
| 1029 |
# Skip name fields we already processed
|
| 1030 |
header_lower = cleaned_header.lower()
|
| 1031 |
if not ("名称" in header_lower or "name" in header_lower):
|
| 1032 |
-
other_fields[header] = value
|
| 1033 |
|
| 1034 |
# Add remaining fields to "其他"
|
| 1035 |
if other_fields:
|
|
@@ -1167,7 +1179,7 @@ def extract_po(docx_path):
|
|
| 1167 |
|
| 1168 |
# Example Usage
|
| 1169 |
|
| 1170 |
-
# extract_po("GN-
|
| 1171 |
# extract_po("EPC简明合同格式-中英对照版.docx")
|
| 1172 |
|
| 1173 |
# print(extract_price_list([{'序号 No.': '1', '名称 Name': 'PE波纹管(双壁波纹管) PE corrugated pipe (double wall corrugated pipe)', '规格 Specification': '内径600mm,6米/根,SN8 Inner diameter 600mm, 6 meters per piece, SN8', '单位 Unit': '米m', '数量 Quantity': '180', '单价(元) Unit Price (CNY)': '106.00', '总额(元) Total Amount (CNY)': '1080.00', '几郎单价(元) Unit Price (GNF)': '16.21', '几郎总额(元) Total Amount (GNF)': '22118.38', '品牌 Brand': '鹏洲PZ', '计划来源 Planned Source': 'SMB268-GNHY-0021-WJ-20250108'}]))
|
|
@@ -1193,7 +1205,4 @@ interface = gr.Interface(
|
|
| 1193 |
theme=Base()
|
| 1194 |
)
|
| 1195 |
|
| 1196 |
-
interface.launch(show_error=True)
|
| 1197 |
-
|
| 1198 |
-
|
| 1199 |
-
|
|
|
|
| 985 |
|
| 986 |
# Step 1: Handle name fields first - look for any field with "名称" or "name"
|
| 987 |
for header, value in row.items():
|
| 988 |
+
# Skip if header is None
|
| 989 |
+
if header is None:
|
| 990 |
+
continue
|
| 991 |
+
|
| 992 |
# Clean the header for comparison
|
| 993 |
+
cleaned_header = re.sub(r'\s+', ' ', str(header)).strip()
|
| 994 |
header_lower = cleaned_header.lower()
|
| 995 |
|
| 996 |
if ("名称" in header_lower or "name" in header_lower) and value:
|
| 997 |
# If field contains both Chinese and English, separate them
|
| 998 |
+
if re.search(r'[\u4e00-\u9fff]', str(value)) and re.search(r'[a-zA-Z]', str(value)):
|
| 999 |
+
chinese, english = separate_chinese_english(str(value))
|
| 1000 |
if chinese:
|
| 1001 |
new_row["名称"] = chinese
|
| 1002 |
if english:
|
|
|
|
| 1004 |
print(f"Separated: '{value}' → Chinese: '{chinese}', English: '{english}'")
|
| 1005 |
else:
|
| 1006 |
# Just set the name directly
|
| 1007 |
+
new_row["名称"] = str(value)
|
| 1008 |
break # Stop after finding first name field
|
| 1009 |
|
| 1010 |
# Step 2: Fill in all other fields using standard mapping
|
| 1011 |
for header, value in row.items():
|
| 1012 |
+
# Skip if header is None
|
| 1013 |
+
if header is None:
|
| 1014 |
+
continue
|
| 1015 |
+
|
| 1016 |
# Skip empty values
|
| 1017 |
if not value:
|
| 1018 |
continue
|
| 1019 |
|
| 1020 |
# Clean the header for comparison
|
| 1021 |
+
cleaned_header = re.sub(r'\s+', ' ', str(header)).strip()
|
| 1022 |
|
| 1023 |
# Check if this maps to a standard field
|
| 1024 |
matched_field = None
|
| 1025 |
for std_field, mapped_header in standard_field_mapping.items():
|
| 1026 |
+
# Skip if mapped_header is None
|
| 1027 |
+
if mapped_header is None:
|
| 1028 |
+
continue
|
| 1029 |
+
|
| 1030 |
# Make comparison more flexible by lowercasing and stripping spaces
|
| 1031 |
if mapped_header.lower().strip() == cleaned_header.lower().strip():
|
| 1032 |
matched_field = std_field
|
|
|
|
| 1035 |
# If we found a mapping, use it (but don't overwrite name fields)
|
| 1036 |
if matched_field:
|
| 1037 |
if matched_field not in ["名称", "名称(英文)"] or not new_row[matched_field]:
|
| 1038 |
+
new_row[matched_field] = str(value)
|
| 1039 |
# If no mapping found, add to other_fields
|
| 1040 |
else:
|
| 1041 |
# Skip name fields we already processed
|
| 1042 |
header_lower = cleaned_header.lower()
|
| 1043 |
if not ("名称" in header_lower or "name" in header_lower):
|
| 1044 |
+
other_fields[header] = str(value)
|
| 1045 |
|
| 1046 |
# Add remaining fields to "其他"
|
| 1047 |
if other_fields:
|
|
|
|
| 1179 |
|
| 1180 |
# Example Usage
|
| 1181 |
|
| 1182 |
+
# extract_po("test-contracts\GN-SMB268202501-042WJ SMB268波纹管采购合同-东营顺航.docx")
|
| 1183 |
# extract_po("EPC简明合同格式-中英对照版.docx")
|
| 1184 |
|
| 1185 |
# print(extract_price_list([{'序号 No.': '1', '名称 Name': 'PE波纹管(双壁波纹管) PE corrugated pipe (double wall corrugated pipe)', '规格 Specification': '内径600mm,6米/根,SN8 Inner diameter 600mm, 6 meters per piece, SN8', '单位 Unit': '米m', '数量 Quantity': '180', '单价(元) Unit Price (CNY)': '106.00', '总额(元) Total Amount (CNY)': '1080.00', '几郎单价(元) Unit Price (GNF)': '16.21', '几郎总额(元) Total Amount (GNF)': '22118.38', '品牌 Brand': '鹏洲PZ', '计划来源 Planned Source': 'SMB268-GNHY-0021-WJ-20250108'}]))
|
|
|
|
| 1205 |
theme=Base()
|
| 1206 |
)
|
| 1207 |
|
| 1208 |
+
interface.launch(show_error=True)
|
|
|
|
|
|
|
|
|