| import xml.etree.ElementTree as ET
|
| from xml.dom import minidom
|
| import re
|
|
|
| def create_specific_translation_dict():
|
| """
|
| 基于实际GEXF文件内容创建专门的翻译词典
|
| """
|
| translation_dict = {
|
|
|
|
|
|
|
| "粉砂岩": "Siltstone",
|
| "灰岩": "Limestone",
|
| "板岩": "Slate",
|
| "砂岩": "Sandstone",
|
| "火山岩": "Volcanic rock",
|
| "二云石英片岩": "Two-mica quartz schist",
|
| "岩石": "Rock",
|
| "硅质岩": "Chert",
|
| "黑云斜长片麻岩": "Biotite plagioclase gneiss",
|
| "长石石英砂岩": "Feldspathic quartz sandstone",
|
| "长石砂岩": "Feldspathic sandstone",
|
| "岩屑砂岩": "Lithic sandstone",
|
| "粉砂质板岩": "Silty slate",
|
| "泥晶灰岩": "Micritic limestone",
|
| "碎屑岩": "Clastic rock",
|
| "碳酸盐岩": "Carbonate rock",
|
| "俄巴达动灰岩": "Obadong Limestone",
|
| "安山岩": "Andesite",
|
| "安山玄武岩": "Andesitic basalt",
|
| "达龙砂岩": "Dalong Sandstone",
|
| "英安岩": "Dacite",
|
|
|
|
|
| "九十道班组": "Jiushidaoban Formation",
|
| "开心岭群": "Kaixinling Group",
|
| "结隆组": "Jielong Formation",
|
| "多彩蛇绿混杂岩": "Duocai Ophiolite Complex",
|
| "通天河蛇绿混杂岩": "Tongtianhe Ophiolite Complex",
|
| "波里拉组": "Bolila Formation",
|
| "诺日巴尕日保组": "Nuoribagaribao Formation",
|
| "含煤碎屑岩组": "Coal-bearing clastic formation",
|
| "结扎群": "Jiezha Group",
|
| "尕笛考组": "Gadikao Formation",
|
| "甲丕拉组": "Jiapila Formation",
|
| "巴颜喀拉山群": "Bayankala Group",
|
| "灰色灰绿色厚层中-细粒岩屑长石砂岩": "Gray to grayish-green thick-bedded medium- to fine-grained lithic arkose",
|
| "碎屑岩组": "Clastic formation",
|
| "查涌蛇绿混杂岩": "Chayong Ophiolite Complex",
|
| "杂多群": "Zaduo Group",
|
| "测区南部岩石": "Southern survey area rocks",
|
| "岩石组合": "Rock association",
|
| "杂多群碎屑岩组": "Zaduo Group clastic formation",
|
| "巴塘群": "Batang Group",
|
| "古中元古代宁多岩群": "Paleo-Mesoproterozoic Ningduo Group",
|
| "格仁火山岩": "Geren Volcanics",
|
| "晚三叠世巴塘群及结扎群": "Late Triassic Batang Group and Jiezha Group",
|
| "上段": "Upper member",
|
| "火山岩组": "Volcanic formation",
|
| "晚三叠世巴塘群": "Late Triassic Batang Group",
|
| "西金乌兰-金沙江地层分区": "Xijinwulan-Jinsha River stratigraphic division",
|
| "该套地层": "This stratigraphic sequence",
|
| "新近纪曲果组": "Neogene Quguo Formation",
|
| "相似地层": "Correlative strata",
|
| "含矽线红柱石黑云母斜长片麻岩": "Sillimanite-andalusite-bearing biotite-plagioclase gneiss",
|
| "沱沱河组": "Tuotuohe Formation",
|
| "砂岩组(2-3B)": "Sandstone formation (2-3B)",
|
| "晚三叠世结扎群": "Late Triassic Jiezha Group",
|
| "巴贡组": "Bagong Formation",
|
| "测区结扎群": "Survey area Jiezha Group",
|
| "宁多群": "Ningduo Group",
|
| "青海省第二区调队": "Qinghai Province Second Geological Survey Team",
|
| "三叠纪查涌蛇绿混杂岩": "Triassic Chayong Ophiolite Complex",
|
| "早中二叠世尕笛考组": "Early-Middle Permian Gadikao Formation",
|
| "晚二叠世-早三叠世火山岩组": "Late Permian-Early Triassic volcanic formation",
|
| "当江荣火山岩": "Dangjianglong Volcanics",
|
|
|
|
|
| "斑晶": "Phenocryst",
|
| "斜长石": "Plagioclase",
|
| "石英": "Quartz",
|
| "黑云母": "Biotite",
|
| "黄铁矿晶体": "Pyrite crystals",
|
| "砾石": "Gravel",
|
|
|
|
|
| "双壳类": "Bivalvia",
|
| "腕足类": "Brachiopoda",
|
| "头足类": "Cephalopoda",
|
|
|
|
|
| "测区": "Study area",
|
| "青海省": "Qinghai Province",
|
|
|
|
|
| "晚三叠世": "Late Triassic",
|
| "早中二叠世": "Early-Middle Permian",
|
| "三叠纪": "Triassic",
|
| "早石炭世": "Early Carboniferous",
|
| "二叠纪": "Permian",
|
|
|
|
|
| "灰绿色": "Grayish green",
|
|
|
|
|
| "片状构造": "Foliated structure",
|
| "斑状结构": "Porphyritic texture",
|
| "水平层理": "Horizontal bedding",
|
|
|
|
|
| "岩性": "Lithology",
|
| "出露于": "Exposed",
|
| "出露地层": "Outcrop",
|
| "含有": "Contained",
|
| "古生物": "Paleontological",
|
| "属于": "Belongs to",
|
|
|
| "断层接触": "Fault contact",
|
| "包含": "Includes",
|
| "所属年代": "Age",
|
| "发育": "Develop",
|
| "角度不整合": "Angular unconformity",
|
| "命名": "Named by",
|
| "创名": "Established by",
|
| "结构": "Structure",
|
| "厚度": "Thickness",
|
|
|
| "分布形态": "Distribution pattern",
|
| "坐标": "Coordinates",
|
| "整合接触": "Integration of contacts",
|
| "不整合接触": "Unconformity contact",
|
| "海拔": "Elevation",
|
| "位于": "Located at",
|
| "面积": "Exposed area",
|
| "大地构造位置": "Geotectonic position",
|
| "地层区划": "Stratigraphical zoning",
|
| "假整合接触": "Consolidated contact",
|
| "行政区划": "Administrative area",
|
| "吞噬": "Engulfed",
|
| "长度": "Length",
|
| "侵入": "Invade",
|
|
|
|
|
|
|
|
|
|
|
| "高": "High",
|
| "中": "Medium",
|
| "低": "Low"
|
| }
|
|
|
| return translation_dict
|
|
|
| def translate_geological_term_specific(chinese_term, translation_dict):
|
| """
|
| 翻译特定的地质术语
|
| """
|
|
|
| if chinese_term in translation_dict:
|
| return translation_dict[chinese_term]
|
|
|
|
|
| return chinese_term
|
|
|
| def translate_specific_gexf_file(input_file, output_file=None):
|
| """
|
| 翻译特定GEXF文件中的中文地质术语为英文
|
|
|
| 参数:
|
| - input_file: 输入的GEXF文件路径
|
| - output_file: 输出的GEXF文件路径,如果为None则在原文件名后加_english
|
| """
|
| if output_file is None:
|
| output_file = input_file.replace('.gexf', '_english.gexf')
|
|
|
|
|
| translation_dict = create_specific_translation_dict()
|
|
|
| try:
|
|
|
| tree = ET.parse(input_file)
|
| root = tree.getroot()
|
|
|
|
|
| translated_nodes = 0
|
| translated_edges = 0
|
| untranslated_terms = set()
|
|
|
| print(f"开始处理GEXF文件: {input_file}")
|
|
|
|
|
| nodes = root.findall('.//{http://www.gexf.net/1.2draft}node')
|
| print(f"找到 {len(nodes)} 个节点")
|
|
|
| for node in nodes:
|
| original_label = node.get('label', '')
|
| if original_label:
|
| translated_label = translate_geological_term_specific(original_label, translation_dict)
|
| if translated_label != original_label:
|
| node.set('label', translated_label)
|
| translated_nodes += 1
|
| print(f"节点翻译: {original_label} -> {translated_label}")
|
| else:
|
| untranslated_terms.add(original_label)
|
|
|
|
|
| edges = root.findall('.//{http://www.gexf.net/1.2draft}edge')
|
| print(f"找到 {len(edges)} 条边")
|
|
|
| for edge in edges:
|
|
|
| original_edge_label = edge.get('label', '')
|
| if original_edge_label:
|
| translated_edge_label = translate_geological_term_specific(original_edge_label, translation_dict)
|
| if translated_edge_label != original_edge_label:
|
| edge.set('label', translated_edge_label)
|
| print(f"边标签翻译: {original_edge_label} -> {translated_edge_label}")
|
| else:
|
| untranslated_terms.add(original_edge_label)
|
|
|
|
|
| for attvalue in edge.findall('.//{http://www.gexf.net/1.2draft}attvalue'):
|
| attr_for = attvalue.get('for', '')
|
| original_value = attvalue.get('value', '')
|
|
|
| if attr_for in ['label', 'relationship'] and original_value:
|
| translated_value = translate_geological_term_specific(original_value, translation_dict)
|
| if translated_value != original_value:
|
| attvalue.set('value', translated_value)
|
| translated_edges += 1
|
| else:
|
| untranslated_terms.add(original_value)
|
|
|
|
|
| importance_attrs = root.findall('.//{http://www.gexf.net/1.2draft}attvalue[@for="importance"]')
|
| print(f"找到 {len(importance_attrs)} 个重要性属性")
|
|
|
| for attvalue in importance_attrs:
|
| original_importance = attvalue.get('value', '')
|
| if original_importance:
|
| translated_importance = translate_geological_term_specific(original_importance, translation_dict)
|
| if translated_importance != original_importance:
|
| attvalue.set('value', translated_importance)
|
| print(f"重要性翻译: {original_importance} -> {translated_importance}")
|
|
|
|
|
| meta = root.find('.//{http://www.gexf.net/1.2draft}meta')
|
| if meta is not None:
|
|
|
| description = meta.find('.//{http://www.gexf.net/1.2draft}description')
|
| if description is not None:
|
| description.text = "Geological Knowledge Graph - English Translation"
|
|
|
|
|
| creator = meta.find('.//{http://www.gexf.net/1.2draft}creator')
|
| if creator is not None:
|
| creator.text = "Geological Knowledge Graph Translator"
|
|
|
|
|
| rough_string = ET.tostring(root, encoding='unicode')
|
| reparsed = minidom.parseString(rough_string)
|
| pretty_xml = reparsed.toprettyxml(indent=" ", encoding=None)
|
|
|
|
|
| pretty_lines = [line for line in pretty_xml.split('\n') if line.strip()]
|
| pretty_xml = '\n'.join(pretty_lines)
|
|
|
| with open(output_file, 'w', encoding='utf-8') as f:
|
| f.write(pretty_xml)
|
|
|
|
|
| print(f"\nGEXF英文翻译完成: {output_file}")
|
| print(f"翻译的节点数: {translated_nodes}")
|
| print(f"翻译的边关系数: {translated_edges}")
|
| print(f"翻译词典包含术语: {len(translation_dict)} 个")
|
|
|
| if untranslated_terms:
|
| print(f"\n保持原文的术语 ({len(untranslated_terms)} 个):")
|
| for term in sorted(untranslated_terms):
|
| print(f" - {term}")
|
|
|
|
|
| untranslated_file = output_file.replace('.gexf', '_untranslated_terms.txt')
|
| with open(untranslated_file, 'w', encoding='utf-8') as f:
|
| f.write("未翻译的地质术语列表:\n")
|
| f.write("=" * 50 + "\n")
|
| for term in sorted(untranslated_terms):
|
| f.write(f"{term}\n")
|
| print(f"未翻译术语已保存到: {untranslated_file}")
|
| else:
|
| print("所有术语均已成功翻译!")
|
|
|
| print("\n翻译特点:")
|
| print("- 专门针对此GEXF文件中的术语进行翻译")
|
| print("- 保持了地质术语的专业性和准确性")
|
| print("- 纯英文输出,适合国际学术交流")
|
| print("- 保留了原有的网络结构和可视化属性")
|
|
|
| return output_file
|
|
|
| except Exception as e:
|
| print(f"翻译GEXF文件时出错: {e}")
|
| import traceback
|
| traceback.print_exc()
|
| return None
|
|
|
|
|
| def main():
|
| """主函数,执行翻译任务"""
|
|
|
| input_file = 'knowledge_graph.gexf'
|
|
|
| print("开始翻译地质知识图谱GEXF文件...")
|
| print("=" * 60)
|
|
|
|
|
| import os
|
| if not os.path.exists(input_file):
|
| print(f"错误:找不到输入文件 {input_file}")
|
| print("请确保 knowledge_graph.gexf 文件在当前目录中")
|
| return
|
|
|
|
|
| english_file = translate_specific_gexf_file(input_file, 'knowledge_graph_english.gexf')
|
|
|
| print("\n" + "=" * 60)
|
| print("翻译任务完成!")
|
| if english_file:
|
| print(f"输出文件: {english_file}")
|
|
|
| if __name__ == "__main__":
|
| main() |