Spaces:
Sleeping
Sleeping
Update services/kb_creation.py
Browse files- services/kb_creation.py +4 -5
services/kb_creation.py
CHANGED
|
@@ -7,10 +7,9 @@
|
|
| 7 |
import os
|
| 8 |
import re
|
| 9 |
import pickle
|
| 10 |
-
from typing import TYPE_CHECKING, Any, List, Dict,
|
| 11 |
|
| 12 |
-
# During type checking (Pylance/mypy)
|
| 13 |
-
# At runtime (or if docx is missing), we fallback to Any to avoid import issues.
|
| 14 |
if TYPE_CHECKING:
|
| 15 |
from docx import Document as DocxDocument
|
| 16 |
else:
|
|
@@ -22,7 +21,7 @@ try:
|
|
| 22 |
except Exception:
|
| 23 |
Document = None # type: ignore
|
| 24 |
|
| 25 |
-
#
|
| 26 |
import chromadb # type: ignore
|
| 27 |
from sentence_transformers import SentenceTransformer # type: ignore
|
| 28 |
|
|
@@ -58,7 +57,7 @@ def _tokenize_meta_value(val: Optional[str]) -> List[str]:
|
|
| 58 |
return _tokenize(val or "")
|
| 59 |
|
| 60 |
# --------------------------- DOCX parsing & chunking ---------------------------
|
| 61 |
-
BULLET_RE = re.compile(r"^\s*(?:[\-\*\u2022]|\d+[
|
| 62 |
|
| 63 |
SECTION_KEYWORDS = (
|
| 64 |
"overview", "introduction", "purpose",
|
|
|
|
| 7 |
import os
|
| 8 |
import re
|
| 9 |
import pickle
|
| 10 |
+
from typing import TYPE_CHECKING, Any, List, Dict, Tuple, Optional
|
| 11 |
|
| 12 |
+
# During type checking (Pylance/mypy) import a real type; at runtime fallback to Any.
|
|
|
|
| 13 |
if TYPE_CHECKING:
|
| 14 |
from docx import Document as DocxDocument
|
| 15 |
else:
|
|
|
|
| 21 |
except Exception:
|
| 22 |
Document = None # type: ignore
|
| 23 |
|
| 24 |
+
# External libs assumed available in your runtime.
|
| 25 |
import chromadb # type: ignore
|
| 26 |
from sentence_transformers import SentenceTransformer # type: ignore
|
| 27 |
|
|
|
|
| 57 |
return _tokenize(val or "")
|
| 58 |
|
| 59 |
# --------------------------- DOCX parsing & chunking ---------------------------
|
| 60 |
+
BULLET_RE = re.compile(r"^\s*(?:[\-\*\u2022]|\d+[.\)])\s+", re.IGNORECASE)
|
| 61 |
|
| 62 |
SECTION_KEYWORDS = (
|
| 63 |
"overview", "introduction", "purpose",
|