randusertry commited on
Commit
cf96d1d
·
verified ·
1 Parent(s): 296d8de

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -4
app.py CHANGED
@@ -9,6 +9,25 @@ from pathlib import Path
9
  from fastapi import FastAPI, UploadFile, File, HTTPException
10
  from docling.datamodel.base_models import InputFormat
11
  from docling.document_converter import DocumentConverter
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
 
13
  app = FastAPI()
14
 
@@ -103,11 +122,7 @@ async def extract(file: UploadFile = File(...)):
103
  "stream": stream
104
  }
105
 
106
- md = MarkItDown()
107
 
108
- converter = DocumentConverter()
109
- UPLOAD_DIR = Path("temp_uploads")
110
- UPLOAD_DIR.mkdir(exist_ok=True)
111
 
112
  @app.post("/docling")
113
  async def convert_document(file: UploadFile = File(...)):
 
9
  from fastapi import FastAPI, UploadFile, File, HTTPException
10
  from docling.datamodel.base_models import InputFormat
11
  from docling.document_converter import DocumentConverter
12
+ from docling.datamodel.pipeline_options import PdfPipelineOptions, EasyOcrOptions
13
+ from docling.document_converter import PdfFormatOption
14
+ from docling.datamodel.base_models import InputFormat
15
+
16
+ md = MarkItDown()
17
+
18
+ UPLOAD_DIR = Path("temp_uploads")
19
+ UPLOAD_DIR.mkdir(exist_ok=True)
20
+
21
+ pipeline_options = PdfPipelineOptions()
22
+ pipeline_options.ocr_options = EasyOcrOptions()
23
+
24
+ converter = DocumentConverter(
25
+ format_options={
26
+ InputFormat.PDF: PdfFormatOption(pipeline_options=pipeline_options)
27
+ }
28
+ )
29
+
30
+ print("Converter initialized successfully with EasyOCR!")
31
 
32
  app = FastAPI()
33
 
 
122
  "stream": stream
123
  }
124
 
 
125
 
 
 
 
126
 
127
  @app.post("/docling")
128
  async def convert_document(file: UploadFile = File(...)):