Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -13,6 +13,9 @@ PRIOR_MESSAGE_MARKERS = [
|
|
| 13 |
re.compile(r'^On .* wrote:', re.IGNORECASE),
|
| 14 |
re.compile(r'^----\s?Original Message\s?----$', re.IGNORECASE),
|
| 15 |
re.compile(r'^Begin forwarded message:', re.IGNORECASE),
|
|
|
|
|
|
|
|
|
|
| 16 |
|
| 17 |
# Portuguese patterns
|
| 18 |
re.compile(r'^Em .* escreveu:', re.IGNORECASE),
|
|
@@ -67,6 +70,11 @@ def remove_quoted_text(soup):
|
|
| 67 |
for hr in soup.find_all('hr'):
|
| 68 |
hr.decompose()
|
| 69 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 70 |
return soup
|
| 71 |
|
| 72 |
def extract_latest_message_from_lines(lines):
|
|
|
|
| 13 |
re.compile(r'^On .* wrote:', re.IGNORECASE),
|
| 14 |
re.compile(r'^----\s?Original Message\s?----$', re.IGNORECASE),
|
| 15 |
re.compile(r'^Begin forwarded message:', re.IGNORECASE),
|
| 16 |
+
|
| 17 |
+
# Custom separators in email
|
| 18 |
+
re.compile(r'^-+.*-+$'), # For lines like "----------------------------------------------------------------------------------------------------------------"
|
| 19 |
|
| 20 |
# Portuguese patterns
|
| 21 |
re.compile(r'^Em .* escreveu:', re.IGNORECASE),
|
|
|
|
| 70 |
for hr in soup.find_all('hr'):
|
| 71 |
hr.decompose()
|
| 72 |
|
| 73 |
+
# Remove tables with dotted borders (a typical marker of a previous conversation)
|
| 74 |
+
for table in soup.find_all('table'):
|
| 75 |
+
if 'border-top:1px dotted' in str(table):
|
| 76 |
+
table.decompose()
|
| 77 |
+
|
| 78 |
return soup
|
| 79 |
|
| 80 |
def extract_latest_message_from_lines(lines):
|