File size: 1,138 Bytes
3ca3932 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 | #!/usr/bin/env python3
"""Parse sentences from sentences_200.txt using underthesea dependency_parse."""
from underthesea import dependency_parse
# Read the file
with open("sentences_200.txt", "r", encoding="utf-8") as f:
lines = f.readlines()
# Parse each sentence
results = []
for line in lines:
line = line.strip()
if not line:
continue
# Split by tab - format is: "ID\tfiction\tsentence"
parts = line.split("\t")
if len(parts) >= 3:
sentence_id = parts[0].strip()
sentence_text = parts[2].strip()
print(f"\n{'='*60}")
print(f"Sentence {sentence_id}:")
print(f"Text: {sentence_text}")
print(f"{'='*60}")
try:
parse_result = dependency_parse(sentence_text)
print("Dependency Parse:")
for token in parse_result:
print(f" {token}")
results.append((sentence_id, sentence_text, parse_result))
except Exception as e:
print(f"Error parsing: {e}")
results.append((sentence_id, sentence_text, None))
print(f"\n\nTotal sentences parsed: {len(results)}")
|