cours_nsi_term

Running

cours_nsi_term / utils.py

Upload 22 files

8cec716 verified 2 months ago

1.27 kB

	import os
	from langchain_community.document_loaders import UnstructuredMarkdownLoader, PyPDFLoader
	from fastapi import UploadFile
	import shutil

	async def process_file(file: UploadFile) -> str:
	"""
	Saves the uploaded file temporarily, loads its content based on extension,
	and returns the text content.
	"""
	temp_filename = f"temp_{file.filename}"
	with open(temp_filename, "wb") as buffer:
	shutil.copyfileobj(file.file, buffer)

	content = ""
	try:
	if temp_filename.endswith(".md"):
	loader = UnstructuredMarkdownLoader(temp_filename)
	docs = loader.load()
	content = "\n\n".join([d.page_content for d in docs])
	elif temp_filename.endswith(".pdf"):
	loader = PyPDFLoader(temp_filename)
	docs = loader.load()
	content = "\n\n".join([d.page_content for d in docs])
	else:
	# Fallback for text files
	with open(temp_filename, "r", encoding="utf-8") as f:
	content = f.read()
	except Exception as e:
	print(f"Error processing file: {e}")
	content = "Error processing file."
	finally:
	if os.path.exists(temp_filename):
	os.remove(temp_filename)

	return content