Update textsegmentation.py
Browse files- textsegmentation.py +4 -17
textsegmentation.py
CHANGED
|
@@ -3,29 +3,16 @@ def textsegmentation():
|
|
| 3 |
with open(contract_file_path, 'r') as file:
|
| 4 |
contract_text = file.read()
|
| 5 |
|
| 6 |
-
#
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
# Remove leading and trailing whitespace from each paragraph
|
| 10 |
-
paragraphs = [paragraph.strip() for paragraph in paragraphs]
|
| 11 |
-
|
| 12 |
-
# Remove line breaks within each paragraph
|
| 13 |
-
paragraphs = [re.sub(r'\s+', ' ', paragraph) for paragraph in paragraphs]
|
| 14 |
-
new_sentences.append(paragraphs)
|
| 15 |
-
|
| 16 |
-
# Print the extracted clauses
|
| 17 |
-
for i, clause in enumerate(paragraphs):
|
| 18 |
-
print(f"Segment {i+1}: {clause}\n")
|
| 19 |
|
| 20 |
# Prepare data for CSV
|
| 21 |
-
|
| 22 |
-
data = [(i+1, paragraph) for i, paragraph in enumerate(paragraphs)]
|
| 23 |
|
| 24 |
# Write the data to CSV file
|
| 25 |
with open(output_csv_file, 'w', newline='', encoding='utf-8') as file:
|
| 26 |
writer = csv.writer(file)
|
| 27 |
-
writer.writerow(['
|
| 28 |
writer.writerows(data)
|
| 29 |
|
| 30 |
-
|
| 31 |
print("Output saved to CSV file.")
|
|
|
|
| 3 |
with open(contract_file_path, 'r') as file:
|
| 4 |
contract_text = file.read()
|
| 5 |
|
| 6 |
+
# Tokenize the contract text into sentences
|
| 7 |
+
sentences = nltk.sent_tokenize(contract_text)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
|
| 9 |
# Prepare data for CSV
|
| 10 |
+
data = [(i+1, sentence) for i, sentence in enumerate(sentences)]
|
|
|
|
| 11 |
|
| 12 |
# Write the data to CSV file
|
| 13 |
with open(output_csv_file, 'w', newline='', encoding='utf-8') as file:
|
| 14 |
writer = csv.writer(file)
|
| 15 |
+
writer.writerow(['Sentence ID', 'Sentence Text']) # Write header
|
| 16 |
writer.writerows(data)
|
| 17 |
|
|
|
|
| 18 |
print("Output saved to CSV file.")
|