from fastapi import FastAPI, UploadFile, File from fastapi.responses import JSONResponse import os from tempfile import NamedTemporaryFile import pandas as pd from docx import Document import pdfplumber app = FastAPI() def extract_text_from_docx(file_path): doc = Document(file_path) return "\n".join([p.text for p in doc.paragraphs]) def extract_text_from_pdf(file_path): text = "" with pdfplumber.open(file_path) as pdf: for page in pdf.pages: text += page.extract_text() + "\n" return text def extract_text_from_sheet(file_path): if file_path.endswith(".csv"): df = pd.read_csv(file_path) else: df = pd.read_excel(file_path) return df.to_string(index=False) @app.post("/upload") async def upload(file: UploadFile = File(...)): extension = os.path.splitext(file.filename)[1].lower() mime_type = file.content_type with NamedTemporaryFile(delete=False, suffix=extension) as temp_file: temp_file.write(await file.read()) temp_path = temp_file.name try: if extension == ".docx": texte = extract_text_from_docx(temp_path) elif extension == ".pdf": texte = extract_text_from_pdf(temp_path) elif extension in [".csv", ".xlsx"]: texte = extract_text_from_sheet(temp_path) else: return JSONResponse(status_code=400, content={"erreur": "Type de fichier non supporté"}) except Exception as e: return JSONResponse(status_code=500, content={"erreur": str(e)}) finally: os.remove(temp_path) return { "nom_fichier": file.filename, "type": mime_type, "texte": texte } @app.get("/") def ping(): return {"message": "L'API fonctionne ✅"}