Spaces:

KJ24
/

Transcript_file_Drive

Sleeping

Transcript_file_Drive / app.py

Create app.py

297eccb verified 9 months ago

1.77 kB

	from fastapi import FastAPI, UploadFile, File
	from fastapi.responses import JSONResponse
	import os
	from tempfile import NamedTemporaryFile
	import pandas as pd
	from docx import Document
	import pdfplumber

	app = FastAPI()

	def extract_text_from_docx(file_path):
	doc = Document(file_path)
	return "\n".join([p.text for p in doc.paragraphs])

	def extract_text_from_pdf(file_path):
	text = ""
	with pdfplumber.open(file_path) as pdf:
	for page in pdf.pages:
	text += page.extract_text() + "\n"
	return text

	def extract_text_from_sheet(file_path):
	if file_path.endswith(".csv"):
	df = pd.read_csv(file_path)
	else:
	df = pd.read_excel(file_path)
	return df.to_string(index=False)

	@app.post("/upload")
	async def upload(file: UploadFile = File(...)):
	extension = os.path.splitext(file.filename)[1].lower()
	mime_type = file.content_type

	with NamedTemporaryFile(delete=False, suffix=extension) as temp_file:
	temp_file.write(await file.read())
	temp_path = temp_file.name

	try:
	if extension == ".docx":
	texte = extract_text_from_docx(temp_path)
	elif extension == ".pdf":
	texte = extract_text_from_pdf(temp_path)
	elif extension in [".csv", ".xlsx"]:
	texte = extract_text_from_sheet(temp_path)
	else:
	return JSONResponse(status_code=400, content={"erreur": "Type de fichier non supporté"})
	except Exception as e:
	return JSONResponse(status_code=500, content={"erreur": str(e)})
	finally:
	os.remove(temp_path)

	return {
	"nom_fichier": file.filename,
	"type": mime_type,
	"texte": texte
	}

	@app.get("/")
	def ping():
	return {"message": "L'API fonctionne ✅"}