Spaces:

KJ24
/

Transcript_file_Drive

Sleeping

File size: 1,773 Bytes

297eccb

from fastapi import FastAPI, UploadFile, File
from fastapi.responses import JSONResponse
import os
from tempfile import NamedTemporaryFile
import pandas as pd
from docx import Document
import pdfplumber

app = FastAPI()

def extract_text_from_docx(file_path):
    doc = Document(file_path)
    return "\n".join([p.text for p in doc.paragraphs])

def extract_text_from_pdf(file_path):
    text = ""
    with pdfplumber.open(file_path) as pdf:
        for page in pdf.pages:
            text += page.extract_text() + "\n"
    return text

def extract_text_from_sheet(file_path):
    if file_path.endswith(".csv"):
        df = pd.read_csv(file_path)
    else:
        df = pd.read_excel(file_path)
    return df.to_string(index=False)

@app.post("/upload")
async def upload(file: UploadFile = File(...)):
    extension = os.path.splitext(file.filename)[1].lower()
    mime_type = file.content_type

    with NamedTemporaryFile(delete=False, suffix=extension) as temp_file:
        temp_file.write(await file.read())
        temp_path = temp_file.name

    try:
        if extension == ".docx":
            texte = extract_text_from_docx(temp_path)
        elif extension == ".pdf":
            texte = extract_text_from_pdf(temp_path)
        elif extension in [".csv", ".xlsx"]:
            texte = extract_text_from_sheet(temp_path)
        else:
            return JSONResponse(status_code=400, content={"erreur": "Type de fichier non supporté"})
    except Exception as e:
        return JSONResponse(status_code=500, content={"erreur": str(e)})
    finally:
        os.remove(temp_path)

    return {
        "nom_fichier": file.filename,
        "type": mime_type,
        "texte": texte
    }

@app.get("/")
def ping():
    return {"message": "L'API fonctionne ✅"}