KJ24's picture
Create app.py
297eccb verified
from fastapi import FastAPI, UploadFile, File
from fastapi.responses import JSONResponse
import os
from tempfile import NamedTemporaryFile
import pandas as pd
from docx import Document
import pdfplumber
app = FastAPI()
def extract_text_from_docx(file_path):
doc = Document(file_path)
return "\n".join([p.text for p in doc.paragraphs])
def extract_text_from_pdf(file_path):
text = ""
with pdfplumber.open(file_path) as pdf:
for page in pdf.pages:
text += page.extract_text() + "\n"
return text
def extract_text_from_sheet(file_path):
if file_path.endswith(".csv"):
df = pd.read_csv(file_path)
else:
df = pd.read_excel(file_path)
return df.to_string(index=False)
@app.post("/upload")
async def upload(file: UploadFile = File(...)):
extension = os.path.splitext(file.filename)[1].lower()
mime_type = file.content_type
with NamedTemporaryFile(delete=False, suffix=extension) as temp_file:
temp_file.write(await file.read())
temp_path = temp_file.name
try:
if extension == ".docx":
texte = extract_text_from_docx(temp_path)
elif extension == ".pdf":
texte = extract_text_from_pdf(temp_path)
elif extension in [".csv", ".xlsx"]:
texte = extract_text_from_sheet(temp_path)
else:
return JSONResponse(status_code=400, content={"erreur": "Type de fichier non supporté"})
except Exception as e:
return JSONResponse(status_code=500, content={"erreur": str(e)})
finally:
os.remove(temp_path)
return {
"nom_fichier": file.filename,
"type": mime_type,
"texte": texte
}
@app.get("/")
def ping():
return {"message": "L'API fonctionne ✅"}