Spaces:
Runtime error
Runtime error
updated email extractor
Browse files
app.py
CHANGED
|
@@ -135,6 +135,31 @@ def email_extractor(email_uploaded):
|
|
| 135 |
|
| 136 |
return email_body, character_cnt, url_cnt
|
| 137 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 138 |
|
| 139 |
# extract email body from parse email
|
| 140 |
def email_body_extractor(email_data):
|
|
@@ -363,7 +388,7 @@ if st.session_state.get('button') == True:
|
|
| 363 |
#uploaded_file = FileChooser(uploaded_file)
|
| 364 |
#bytes_data = uploaded_file.getvalue()
|
| 365 |
|
| 366 |
-
email_body, character_cnt, url_cnt =
|
| 367 |
|
| 368 |
# Start the prediction
|
| 369 |
# Need to solve X test issue
|
|
|
|
| 135 |
|
| 136 |
return email_body, character_cnt, url_cnt
|
| 137 |
|
| 138 |
+
def email_extractor_general(email_uploaded):
|
| 139 |
+
parse = parse_email(email_uploaded)
|
| 140 |
+
email_text = ''.join(parse).strip()
|
| 141 |
+
|
| 142 |
+
# get rid of non-text elements
|
| 143 |
+
email_text = email_text.replace('\n', '')
|
| 144 |
+
email_text = email_text.replace('\t', '')
|
| 145 |
+
email_text = email_text.replace('\r', '')
|
| 146 |
+
email_text = email_text.replace('</b>', '')
|
| 147 |
+
email_text = email_text.replace('<b>', '')
|
| 148 |
+
email_text = email_text.replace('\xa0', '')
|
| 149 |
+
|
| 150 |
+
# find length of URLs if any
|
| 151 |
+
extractor = URLExtract()
|
| 152 |
+
urls = extractor.find_urls(email_text)
|
| 153 |
+
url_cnt = len(urls)
|
| 154 |
+
|
| 155 |
+
# remove URLs and get character count
|
| 156 |
+
body = re.sub(r'\w+:\/{2}[\d\w-]+(\.[\d\w-]+)*(?:(?:\/[^\s/]*))*', '', email_text)
|
| 157 |
+
sep = '©'
|
| 158 |
+
body = body.split(sep, 1)[0]
|
| 159 |
+
character_cnt = sum(not chr.isspace() for chr in body)
|
| 160 |
+
|
| 161 |
+
return email_text, character_cnt, url_cnt
|
| 162 |
+
|
| 163 |
|
| 164 |
# extract email body from parse email
|
| 165 |
def email_body_extractor(email_data):
|
|
|
|
| 388 |
#uploaded_file = FileChooser(uploaded_file)
|
| 389 |
#bytes_data = uploaded_file.getvalue()
|
| 390 |
|
| 391 |
+
email_body, character_cnt, url_cnt = email_extractor_general(uploaded_file)
|
| 392 |
|
| 393 |
# Start the prediction
|
| 394 |
# Need to solve X test issue
|