Andrei Shadrikov commited on
Commit
1aeb427
·
1 Parent(s): 5b9d1ab
Files changed (3) hide show
  1. Dockerfile +14 -0
  2. app.py +40 -0
  3. requirements.txt +2 -0
Dockerfile ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ FROM python:3.9
3
+
4
+ WORKDIR /code
5
+
6
+ COPY ./requirements.txt /code/requirements.txt
7
+
8
+ RUN apt-get install poppler-utils
9
+ RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
10
+
11
+ COPY . .
12
+
13
+ CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860"]
14
+
app.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import subprocess as sp
3
+ import tempfile
4
+ from pathlib import Path
5
+ from fastapi import FastAPI
6
+ from fastapi import FastAPI, File, UploadFile
7
+ import aiofiles
8
+ import shutil
9
+
10
+
11
+ app = FastAPI()
12
+
13
+
14
+ def extract_text(pdf_file):
15
+ """
16
+ Generate a text rendering of a PDF file in the form of a list of lines.
17
+ """
18
+ args = ['pdftotext', '-layout', pdf_file, '-']
19
+ cp = sp.run(
20
+ args, stdout=sp.PIPE, stderr=sp.DEVNULL,
21
+ check=True, text=True
22
+ )
23
+ return cp.stdout
24
+
25
+
26
+ def save_and_export(pdf_file: UploadFile):
27
+ with tempfile.NamedTemporaryFile() as tmp_file:
28
+ shutil.copyfileobj(pdf_file.file, tmp_file)
29
+ resulting_text = extract_text(tmp_file.name)
30
+ return resulting_text
31
+
32
+
33
+ @app.post('/extract_text')
34
+ async def app_extract_text(pdf_file: UploadFile) -> str:
35
+ return save_and_export(pdf_file)
36
+
37
+
38
+ @app.get("/")
39
+ def read_root():
40
+ return {"Hello": "World"}
requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ fastapi
2
+ aiofiles