Spaces:
Sleeping
Sleeping
File size: 1,773 Bytes
297eccb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 |
from fastapi import FastAPI, UploadFile, File
from fastapi.responses import JSONResponse
import os
from tempfile import NamedTemporaryFile
import pandas as pd
from docx import Document
import pdfplumber
app = FastAPI()
def extract_text_from_docx(file_path):
doc = Document(file_path)
return "\n".join([p.text for p in doc.paragraphs])
def extract_text_from_pdf(file_path):
text = ""
with pdfplumber.open(file_path) as pdf:
for page in pdf.pages:
text += page.extract_text() + "\n"
return text
def extract_text_from_sheet(file_path):
if file_path.endswith(".csv"):
df = pd.read_csv(file_path)
else:
df = pd.read_excel(file_path)
return df.to_string(index=False)
@app.post("/upload")
async def upload(file: UploadFile = File(...)):
extension = os.path.splitext(file.filename)[1].lower()
mime_type = file.content_type
with NamedTemporaryFile(delete=False, suffix=extension) as temp_file:
temp_file.write(await file.read())
temp_path = temp_file.name
try:
if extension == ".docx":
texte = extract_text_from_docx(temp_path)
elif extension == ".pdf":
texte = extract_text_from_pdf(temp_path)
elif extension in [".csv", ".xlsx"]:
texte = extract_text_from_sheet(temp_path)
else:
return JSONResponse(status_code=400, content={"erreur": "Type de fichier non supporté"})
except Exception as e:
return JSONResponse(status_code=500, content={"erreur": str(e)})
finally:
os.remove(temp_path)
return {
"nom_fichier": file.filename,
"type": mime_type,
"texte": texte
}
@app.get("/")
def ping():
return {"message": "L'API fonctionne ✅"}
|