Corin1998 commited on
Commit
c8b6d3b
·
verified ·
1 Parent(s): 89620fa

Create utils.py

Browse files
Files changed (1) hide show
  1. pipelines/utils.py +27 -0
pipelines/utils.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import io
2
+ import decx
3
+
4
+ def detect_filetype(filename: str, file_btes: bytes) -> str:
5
+ fname = (filename or "").lower()
6
+ if fname.endswith(".pdf"):
7
+ return "pdf"
8
+ if any(fname.endswith(ext) for ext in [".png", ".jpg", ".jpeg", ".tiff", ".bmp"]):
9
+ return "image"
10
+ if fname.endswith(".docx"):
11
+ return "docx"
12
+ if fname.endswith(".txt"):
13
+ return "text"
14
+ if file_bytes[:4] == b"%PDF":
15
+ return "pdf"
16
+ return "unknown"
17
+
18
+ def load_text(filetype: str, file_bytes: bytes) -> str:
19
+ if filestype == "docx":
20
+ f = io.BytesIO(file_bytes)
21
+ doc = decx.Document(f)
22
+ return "\n".join(p.text for p in doc.paragraphs)
23
+ elif filetype == "text":
24
+ return file_bytes.decode("utf-8", errors="ignore")
25
+ else:
26
+ # それ以外は上位でOpenAI側へルーティング
27
+ return ""