Spaces:
Runtime error
Runtime error
File size: 880 Bytes
a141462 c8b6d3b a141462 c8b6d3b a141462 c8b6d3b a141462 c8b6d3b a141462 c8b6d3b a141462 c8b6d3b a141462 c8b6d3b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 |
# pipelines/utils.py
import io
import docx
def detect_filetype(filename: str, file_bytes: bytes) -> str:
fname = (filename or "").lower()
if fname.endswith(".pdf"):
return "pdf"
if any(fname.endswith(ext) for ext in [".png", ".jpg", ".jpeg", ".tiff", ".bmp"]):
return "image"
if fname.endswith(".docx"):
return "docx"
if fname.endswith(".txt"):
return "txt"
if file_bytes[:4] == b"%PDF":
return "pdf"
return "unknown"
def load_doc_text(filetype: str, file_bytes: bytes) -> str:
if filetype == "docx":
f = io.BytesIO(file_bytes)
doc = docx.Document(f)
return "\n".join([p.text for p in doc.paragraphs])
elif filetype == "txt":
return file_bytes.decode("utf-8", errors="ignore")
else:
# それ以外は上位でOpenAI側へルーティング
return ""
|