SHAFI commited on
Commit ·
2543980
1
Parent(s): c88a0c2
fix: add missing strip_html_if_needed utility for vector store
Browse files- app/utils/__init__.py +8 -0
- app/utils/helpers.py +23 -0
app/utils/__init__.py
CHANGED
|
@@ -1 +1,9 @@
|
|
| 1 |
"""Utility functions"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
"""Utility functions"""
|
| 2 |
+
|
| 3 |
+
from .helpers import (
|
| 4 |
+
generate_id,
|
| 5 |
+
sanitize_filename,
|
| 6 |
+
format_datetime,
|
| 7 |
+
truncate_text,
|
| 8 |
+
strip_html_if_needed
|
| 9 |
+
)
|
app/utils/helpers.py
CHANGED
|
@@ -23,3 +23,26 @@ def truncate_text(text: str, max_length: int = 200) -> str:
|
|
| 23 |
if len(text) <= max_length:
|
| 24 |
return text
|
| 25 |
return text[:max_length-3] + "..."
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
if len(text) <= max_length:
|
| 24 |
return text
|
| 25 |
return text[:max_length-3] + "..."
|
| 26 |
+
|
| 27 |
+
import re
|
| 28 |
+
|
| 29 |
+
def strip_html_if_needed(text: str) -> str:
|
| 30 |
+
"""
|
| 31 |
+
Remove HTML tags from text if present.
|
| 32 |
+
|
| 33 |
+
Args:
|
| 34 |
+
text: Input text that might contain HTML
|
| 35 |
+
|
| 36 |
+
Returns:
|
| 37 |
+
Clean text without HTML tags
|
| 38 |
+
"""
|
| 39 |
+
if not text or not isinstance(text, str):
|
| 40 |
+
return ""
|
| 41 |
+
|
| 42 |
+
# Check if looks like HTML (contains < and >)
|
| 43 |
+
if '<' in text and '>' in text:
|
| 44 |
+
# Simple regex to strip tags
|
| 45 |
+
clean = re.compile('<.*?>')
|
| 46 |
+
return re.sub(clean, '', text)
|
| 47 |
+
|
| 48 |
+
return text
|