#!/usr/bin/env python3 """Parse sentences from sentences_200.txt using underthesea dependency_parse.""" from underthesea import dependency_parse # Read the file with open("sentences_200.txt", "r", encoding="utf-8") as f: lines = f.readlines() # Parse each sentence results = [] for line in lines: line = line.strip() if not line: continue # Split by tab - format is: "ID\tfiction\tsentence" parts = line.split("\t") if len(parts) >= 3: sentence_id = parts[0].strip() sentence_text = parts[2].strip() print(f"\n{'='*60}") print(f"Sentence {sentence_id}:") print(f"Text: {sentence_text}") print(f"{'='*60}") try: parse_result = dependency_parse(sentence_text) print("Dependency Parse:") for token in parse_result: print(f" {token}") results.append((sentence_id, sentence_text, parse_result)) except Exception as e: print(f"Error parsing: {e}") results.append((sentence_id, sentence_text, None)) print(f"\n\nTotal sentences parsed: {len(results)}")