bamboo-1 / src /parse_sentences.py
rain1024's picture
Consolidate project: merge scripts/, bamboo1/ into src/, optimize training
24ec440
#!/usr/bin/env python3
"""Parse sentences from sentences_200.txt using underthesea dependency_parse."""
from underthesea import dependency_parse
# Read the file
with open("sentences_200.txt", "r", encoding="utf-8") as f:
lines = f.readlines()
# Parse each sentence
results = []
for line in lines:
line = line.strip()
if not line:
continue
# Split by tab - format is: "ID\tfiction\tsentence"
parts = line.split("\t")
if len(parts) >= 3:
sentence_id = parts[0].strip()
sentence_text = parts[2].strip()
print(f"\n{'='*60}")
print(f"Sentence {sentence_id}:")
print(f"Text: {sentence_text}")
print(f"{'='*60}")
try:
parse_result = dependency_parse(sentence_text)
print("Dependency Parse:")
for token in parse_result:
print(f" {token}")
results.append((sentence_id, sentence_text, parse_result))
except Exception as e:
print(f"Error parsing: {e}")
results.append((sentence_id, sentence_text, None))
print(f"\n\nTotal sentences parsed: {len(results)}")