KJ24 commited on
Commit
2d8097b
·
verified ·
1 Parent(s): 424e028

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +60 -0
app.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, File, UploadFile
2
+ from fastapi.responses import JSONResponse
3
+ import mimetypes
4
+ import os
5
+ from typing import Optional
6
+
7
+ from docx import Document
8
+ import pdfplumber
9
+ import pandas as pd
10
+ from tempfile import NamedTemporaryFile
11
+
12
+ app = FastAPI()
13
+
14
+ def extract_text_from_docx(file_path):
15
+ doc = Document(file_path)
16
+ return "\n".join([para.text for para in doc.paragraphs])
17
+
18
+ def extract_text_from_pdf(file_path):
19
+ text = ""
20
+ with pdfplumber.open(file_path) as pdf:
21
+ for page in pdf.pages:
22
+ text += page.extract_text() + "\n"
23
+ return text
24
+
25
+ def extract_text_from_sheet(file_path):
26
+ ext = os.path.splitext(file_path)[1]
27
+ if ext == ".csv":
28
+ df = pd.read_csv(file_path)
29
+ else:
30
+ df = pd.read_excel(file_path)
31
+ return df.to_string(index=False)
32
+
33
+ @app.post("/upload")
34
+ async def upload(file: UploadFile = File(...)):
35
+ extension = os.path.splitext(file.filename)[1].lower()
36
+ mime_type = file.content_type
37
+
38
+ with NamedTemporaryFile(delete=False, suffix=extension) as temp_file:
39
+ temp_file.write(await file.read())
40
+ temp_path = temp_file.name
41
+
42
+ try:
43
+ if extension == ".docx":
44
+ texte = extract_text_from_docx(temp_path)
45
+ elif extension == ".pdf":
46
+ texte = extract_text_from_pdf(temp_path)
47
+ elif extension in [".csv", ".xlsx"]:
48
+ texte = extract_text_from_sheet(temp_path)
49
+ else:
50
+ return JSONResponse(status_code=400, content={"erreur": "Type de fichier non supporté"})
51
+ except Exception as e:
52
+ return JSONResponse(status_code=500, content={"erreur": str(e)})
53
+ finally:
54
+ os.remove(temp_path)
55
+
56
+ return {
57
+ "nom_fichier": file.filename,
58
+ "type": mime_type,
59
+ "texte": texte
60
+ }