File size: 1,087 Bytes
1aeb427 d36be01 1aeb427 d36be01 1aeb427 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 |
import os
import subprocess as sp
import tempfile
from pathlib import Path
from fastapi import FastAPI
from fastapi import FastAPI, File, UploadFile
from fastapi.middleware.cors import CORSMiddleware
import shutil
app = FastAPI()
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["POST"],
allow_headers=["*"],
)
def extract_text(pdf_file):
"""
Generate a text rendering of a PDF file in the form of a list of lines.
"""
args = ['pdftotext', '-layout', pdf_file, '-']
cp = sp.run(
args, stdout=sp.PIPE, stderr=sp.DEVNULL,
check=True, text=True
)
return cp.stdout
def save_and_export(pdf_file: UploadFile):
with tempfile.NamedTemporaryFile() as tmp_file:
shutil.copyfileobj(pdf_file.file, tmp_file)
resulting_text = extract_text(tmp_file.name)
return resulting_text
@app.post('/extract_text')
async def app_extract_text(pdf_file: UploadFile) -> str:
return save_and_export(pdf_file)
@app.get("/")
def read_root():
return {"Hello": "World"}
|