Spaces:

Corin1998
/

HFResumeIntakeSystem_DC

Paused

Corin1998 commited on Nov 23, 2025

Commit

1a1b2af

verified ·

1 Parent(s): 17716c9

Create utils.py

Files changed (1) hide show

pipelines/utils.py ADDED Viewed

+import io
+import docx
+def detect_filetype(filename: str, file_bytes: bytes) -> str:
+    fname = (filename or "").lower()
+    if fname.endswith(".pdf"):
+        return "pdf"
+    if any(fname.endswith(ext) for ext in [".png", ".jpg", ".jpeg", ".tiff", ".bmp"]):
+        return "image"
+    if fname.endswith(".docx"):
+        return "docx"
+    if fname.endswith(".txt"):
+        return "txt"
+    if file_bytes[:4] == b"%PDF":
+        return "pdf"
+    return "unknown"
+def load_doc_text(filetype: str, file_bytes: bytes) -> str:
+    if filetype == "docx":
+        f = io.BytesIO(file_bytes)
+        doc = docx.Document(f)
+        return "\n".join([p.text for p in doc.paragraphs])
+    elif filetype == "txt":
+        return file_bytes.decode("utf-8", errors="ignore")
+    else:
+        return ""