pdf2text / app.py
Andrei Shadrikov
CORS
d36be01
import os
import subprocess as sp
import tempfile
from pathlib import Path
from fastapi import FastAPI
from fastapi import FastAPI, File, UploadFile
from fastapi.middleware.cors import CORSMiddleware
import shutil
app = FastAPI()
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["POST"],
allow_headers=["*"],
)
def extract_text(pdf_file):
"""
Generate a text rendering of a PDF file in the form of a list of lines.
"""
args = ['pdftotext', '-layout', pdf_file, '-']
cp = sp.run(
args, stdout=sp.PIPE, stderr=sp.DEVNULL,
check=True, text=True
)
return cp.stdout
def save_and_export(pdf_file: UploadFile):
with tempfile.NamedTemporaryFile() as tmp_file:
shutil.copyfileobj(pdf_file.file, tmp_file)
resulting_text = extract_text(tmp_file.name)
return resulting_text
@app.post('/extract_text')
async def app_extract_text(pdf_file: UploadFile) -> str:
return save_and_export(pdf_file)
@app.get("/")
def read_root():
return {"Hello": "World"}