cours_nsi_term / utils.py
dav74's picture
Upload 22 files
8cec716 verified
import os
from langchain_community.document_loaders import UnstructuredMarkdownLoader, PyPDFLoader
from fastapi import UploadFile
import shutil
async def process_file(file: UploadFile) -> str:
"""
Saves the uploaded file temporarily, loads its content based on extension,
and returns the text content.
"""
temp_filename = f"temp_{file.filename}"
with open(temp_filename, "wb") as buffer:
shutil.copyfileobj(file.file, buffer)
content = ""
try:
if temp_filename.endswith(".md"):
loader = UnstructuredMarkdownLoader(temp_filename)
docs = loader.load()
content = "\n\n".join([d.page_content for d in docs])
elif temp_filename.endswith(".pdf"):
loader = PyPDFLoader(temp_filename)
docs = loader.load()
content = "\n\n".join([d.page_content for d in docs])
else:
# Fallback for text files
with open(temp_filename, "r", encoding="utf-8") as f:
content = f.read()
except Exception as e:
print(f"Error processing file: {e}")
content = "Error processing file."
finally:
if os.path.exists(temp_filename):
os.remove(temp_filename)
return content