|
|
import os |
|
|
import subprocess as sp |
|
|
import tempfile |
|
|
from pathlib import Path |
|
|
from fastapi import FastAPI |
|
|
from fastapi import FastAPI, File, UploadFile |
|
|
from fastapi.middleware.cors import CORSMiddleware |
|
|
import shutil |
|
|
|
|
|
|
|
|
app = FastAPI() |
|
|
|
|
|
app.add_middleware( |
|
|
CORSMiddleware, |
|
|
allow_origins=["*"], |
|
|
allow_credentials=True, |
|
|
allow_methods=["POST"], |
|
|
allow_headers=["*"], |
|
|
) |
|
|
|
|
|
|
|
|
def extract_text(pdf_file): |
|
|
""" |
|
|
Generate a text rendering of a PDF file in the form of a list of lines. |
|
|
""" |
|
|
args = ['pdftotext', '-layout', pdf_file, '-'] |
|
|
cp = sp.run( |
|
|
args, stdout=sp.PIPE, stderr=sp.DEVNULL, |
|
|
check=True, text=True |
|
|
) |
|
|
return cp.stdout |
|
|
|
|
|
|
|
|
def save_and_export(pdf_file: UploadFile): |
|
|
with tempfile.NamedTemporaryFile() as tmp_file: |
|
|
shutil.copyfileobj(pdf_file.file, tmp_file) |
|
|
resulting_text = extract_text(tmp_file.name) |
|
|
return resulting_text |
|
|
|
|
|
|
|
|
@app.post('/extract_text') |
|
|
async def app_extract_text(pdf_file: UploadFile) -> str: |
|
|
return save_and_export(pdf_file) |
|
|
|
|
|
|
|
|
@app.get("/") |
|
|
def read_root(): |
|
|
return {"Hello": "World"} |
|
|
|