SHAFI commited on
Commit
2543980
·
1 Parent(s): c88a0c2

fix: add missing strip_html_if_needed utility for vector store

Browse files
Files changed (2) hide show
  1. app/utils/__init__.py +8 -0
  2. app/utils/helpers.py +23 -0
app/utils/__init__.py CHANGED
@@ -1 +1,9 @@
1
  """Utility functions"""
 
 
 
 
 
 
 
 
 
1
  """Utility functions"""
2
+
3
+ from .helpers import (
4
+ generate_id,
5
+ sanitize_filename,
6
+ format_datetime,
7
+ truncate_text,
8
+ strip_html_if_needed
9
+ )
app/utils/helpers.py CHANGED
@@ -23,3 +23,26 @@ def truncate_text(text: str, max_length: int = 200) -> str:
23
  if len(text) <= max_length:
24
  return text
25
  return text[:max_length-3] + "..."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  if len(text) <= max_length:
24
  return text
25
  return text[:max_length-3] + "..."
26
+
27
+ import re
28
+
29
+ def strip_html_if_needed(text: str) -> str:
30
+ """
31
+ Remove HTML tags from text if present.
32
+
33
+ Args:
34
+ text: Input text that might contain HTML
35
+
36
+ Returns:
37
+ Clean text without HTML tags
38
+ """
39
+ if not text or not isinstance(text, str):
40
+ return ""
41
+
42
+ # Check if looks like HTML (contains < and >)
43
+ if '<' in text and '>' in text:
44
+ # Simple regex to strip tags
45
+ clean = re.compile('<.*?>')
46
+ return re.sub(clean, '', text)
47
+
48
+ return text