Update Tagger.py
Browse files
Tagger.py
CHANGED
|
@@ -1,4 +1,3 @@
|
|
| 1 |
-
# This file tags the major text
|
| 2 |
import pandas as pd
|
| 3 |
import numpy as np
|
| 4 |
import re
|
|
@@ -9,6 +8,7 @@ import nltk
|
|
| 9 |
nltk.download('punkt')
|
| 10 |
from nltk.tokenize import sent_tokenize, word_tokenize
|
| 11 |
|
|
|
|
| 12 |
model = SentenceTransformer('all-mpnet-base-v2')
|
| 13 |
|
| 14 |
def get_paragraphed_text(folderpath):
|
|
@@ -86,4 +86,4 @@ def process_file(folderpath, draw=False):
|
|
| 86 |
paras_text = get_paragraphed_text(folderpath)
|
| 87 |
majority_author_sent, majority_author_para_ind = get_majority_author_sentence(paras_text)
|
| 88 |
if draw:
|
| 89 |
-
draw_line_above_sent(folderpath, majority_author_sent, majority_author_para_ind)
|
|
|
|
|
|
|
| 1 |
import pandas as pd
|
| 2 |
import numpy as np
|
| 3 |
import re
|
|
|
|
| 8 |
nltk.download('punkt')
|
| 9 |
from nltk.tokenize import sent_tokenize, word_tokenize
|
| 10 |
|
| 11 |
+
|
| 12 |
model = SentenceTransformer('all-mpnet-base-v2')
|
| 13 |
|
| 14 |
def get_paragraphed_text(folderpath):
|
|
|
|
| 86 |
paras_text = get_paragraphed_text(folderpath)
|
| 87 |
majority_author_sent, majority_author_para_ind = get_majority_author_sentence(paras_text)
|
| 88 |
if draw:
|
| 89 |
+
draw_line_above_sent(folderpath, majority_author_sent, majority_author_para_ind)
|