Spaces:
Sleeping
Sleeping
| from fastapi import FastAPI, UploadFile, File | |
| from fastapi.responses import JSONResponse | |
| import os | |
| from tempfile import NamedTemporaryFile | |
| import pandas as pd | |
| from docx import Document | |
| import pdfplumber | |
| app = FastAPI() | |
| def extract_text_from_docx(file_path): | |
| doc = Document(file_path) | |
| return "\n".join([p.text for p in doc.paragraphs]) | |
| def extract_text_from_pdf(file_path): | |
| text = "" | |
| with pdfplumber.open(file_path) as pdf: | |
| for page in pdf.pages: | |
| text += page.extract_text() + "\n" | |
| return text | |
| def extract_text_from_sheet(file_path): | |
| if file_path.endswith(".csv"): | |
| df = pd.read_csv(file_path) | |
| else: | |
| df = pd.read_excel(file_path) | |
| return df.to_string(index=False) | |
| async def upload(file: UploadFile = File(...)): | |
| extension = os.path.splitext(file.filename)[1].lower() | |
| mime_type = file.content_type | |
| with NamedTemporaryFile(delete=False, suffix=extension) as temp_file: | |
| temp_file.write(await file.read()) | |
| temp_path = temp_file.name | |
| try: | |
| if extension == ".docx": | |
| texte = extract_text_from_docx(temp_path) | |
| elif extension == ".pdf": | |
| texte = extract_text_from_pdf(temp_path) | |
| elif extension in [".csv", ".xlsx"]: | |
| texte = extract_text_from_sheet(temp_path) | |
| else: | |
| return JSONResponse(status_code=400, content={"erreur": "Type de fichier non supporté"}) | |
| except Exception as e: | |
| return JSONResponse(status_code=500, content={"erreur": str(e)}) | |
| finally: | |
| os.remove(temp_path) | |
| return { | |
| "nom_fichier": file.filename, | |
| "type": mime_type, | |
| "texte": texte | |
| } | |
| def ping(): | |
| return {"message": "L'API fonctionne ✅"} | |