|
|
| import pandas as pd |
| import re |
|
|
| filename = '/Users/steve/sun_also_rises.txt' |
|
|
| csv = [] |
|
|
| with open(filename, "r") as myfile: |
| lines = myfile.readlines() |
| sentence_fragment = '' |
| for i, line in enumerate(lines): |
| line = re.sub(' +', ' ', line) |
| line = re.sub('\n', '', line) |
| sentences = line.split('.') |
| sentences = [sent for sent in sentences if len(sent) > 0] |
| print(f"\n line {i}: {line}, len(line) = {len(line)}, numsent = {len(sentences)}") |
|
|
| |
| if len(line) <= 1: |
| continue |
| |
| |
| if len(sentences) > 1: |
|
|
| |
| for sent in sentences[:-1]: |
| csv.append(str(sentence_fragment + sent + '.').lstrip()) |
| sentence_fragment = '' |
| |
|
|
| |
| if line[-1] != '.': |
| sentence_fragment += sentences[-1] + ' ' |
|
|
| |
| if len(sentences) == 1: |
| |
| |
| if line[-1] == '.': |
| csv.append(str(sentence_fragment + sentences[0] + '.').lstrip()) |
| sentence_fragment = '' |
| |
| else: |
| sentence_fragment += sentences[0] + ' ' |
|
|
| |
| |
| |
| |
|
|
| myfile.close() |
|
|
| df = pd.DataFrame({'sentence':csv}) |
| df.to_csv('/Users/steve/sunalsorises.csv', index=False) |
|
|
| |