Spaces:
Runtime error
Runtime error
Added fence to prevent parsing empty HTML string.
Browse files
functions/helper_functions.py
CHANGED
|
@@ -209,6 +209,9 @@ def get_html(url: str) -> str:
|
|
| 209 |
|
| 210 |
content = content.decode(encoding)
|
| 211 |
|
|
|
|
|
|
|
|
|
|
| 212 |
except HTTPError:
|
| 213 |
content = None
|
| 214 |
|
|
@@ -227,6 +230,9 @@ def get_text(html: str) -> str:
|
|
| 227 |
|
| 228 |
Returns:
|
| 229 |
Cleaned text string'''
|
|
|
|
|
|
|
|
|
|
| 230 |
|
| 231 |
extractor = extractors.ArticleExtractor()
|
| 232 |
|
|
@@ -236,6 +242,11 @@ def get_text(html: str) -> str:
|
|
| 236 |
except HTMLExtractionError:
|
| 237 |
pass
|
| 238 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 239 |
|
| 240 |
return clean_html(html)
|
| 241 |
|
|
|
|
| 209 |
|
| 210 |
content = content.decode(encoding)
|
| 211 |
|
| 212 |
+
else:
|
| 213 |
+
content = None
|
| 214 |
+
|
| 215 |
except HTTPError:
|
| 216 |
content = None
|
| 217 |
|
|
|
|
| 230 |
|
| 231 |
Returns:
|
| 232 |
Cleaned text string'''
|
| 233 |
+
|
| 234 |
+
if html is None:
|
| 235 |
+
return None
|
| 236 |
|
| 237 |
extractor = extractors.ArticleExtractor()
|
| 238 |
|
|
|
|
| 242 |
except HTMLExtractionError:
|
| 243 |
pass
|
| 244 |
|
| 245 |
+
except AttributeError:
|
| 246 |
+
pass
|
| 247 |
+
|
| 248 |
+
except TypeError:
|
| 249 |
+
pass
|
| 250 |
|
| 251 |
return clean_html(html)
|
| 252 |
|