| #!/usr/bin/env python3 | |
| """Parse sentences from sentences_200.txt using underthesea dependency_parse.""" | |
| from underthesea import dependency_parse | |
| # Read the file | |
| with open("sentences_200.txt", "r", encoding="utf-8") as f: | |
| lines = f.readlines() | |
| # Parse each sentence | |
| results = [] | |
| for line in lines: | |
| line = line.strip() | |
| if not line: | |
| continue | |
| # Split by tab - format is: "ID\tfiction\tsentence" | |
| parts = line.split("\t") | |
| if len(parts) >= 3: | |
| sentence_id = parts[0].strip() | |
| sentence_text = parts[2].strip() | |
| print(f"\n{'='*60}") | |
| print(f"Sentence {sentence_id}:") | |
| print(f"Text: {sentence_text}") | |
| print(f"{'='*60}") | |
| try: | |
| parse_result = dependency_parse(sentence_text) | |
| print("Dependency Parse:") | |
| for token in parse_result: | |
| print(f" {token}") | |
| results.append((sentence_id, sentence_text, parse_result)) | |
| except Exception as e: | |
| print(f"Error parsing: {e}") | |
| results.append((sentence_id, sentence_text, None)) | |
| print(f"\n\nTotal sentences parsed: {len(results)}") | |