File size: 1,138 Bytes
3ca3932
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
#!/usr/bin/env python3
"""Parse sentences from sentences_200.txt using underthesea dependency_parse."""

from underthesea import dependency_parse

# Read the file
with open("sentences_200.txt", "r", encoding="utf-8") as f:
    lines = f.readlines()

# Parse each sentence
results = []
for line in lines:
    line = line.strip()
    if not line:
        continue

    # Split by tab - format is: "ID\tfiction\tsentence"
    parts = line.split("\t")
    if len(parts) >= 3:
        sentence_id = parts[0].strip()
        sentence_text = parts[2].strip()

        print(f"\n{'='*60}")
        print(f"Sentence {sentence_id}:")
        print(f"Text: {sentence_text}")
        print(f"{'='*60}")

        try:
            parse_result = dependency_parse(sentence_text)
            print("Dependency Parse:")
            for token in parse_result:
                print(f"  {token}")
            results.append((sentence_id, sentence_text, parse_result))
        except Exception as e:
            print(f"Error parsing: {e}")
            results.append((sentence_id, sentence_text, None))

print(f"\n\nTotal sentences parsed: {len(results)}")