sanali209 commited on
Commit
7c37660
·
verified ·
1 Parent(s): 2074c35

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +7 -3
main.py CHANGED
@@ -1,7 +1,7 @@
1
  from fastapi import FastAPI, UploadFile, File, HTTPException
2
- from docling.document_converter import DocumentConverter, PdfPipelineOptions
3
  from docling.datamodel.base_models import InputFormat
4
- from docling.datamodel.pipeline_options import EasyOcrOptions
5
  import shutil
6
  import os
7
  import tempfile
@@ -28,9 +28,13 @@ def get_converter():
28
  pipeline_options.ocr_options = EasyOcrOptions() # Use EasyOCR for robust recognition on images
29
 
30
  # Initialize converter with explicit formats and options
 
31
  _converter_instance = DocumentConverter(
32
  allowed_formats=[InputFormat.IMAGE, InputFormat.PDF],
33
- pipeline_options=pipeline_options
 
 
 
34
  )
35
  return _converter_instance
36
 
 
1
  from fastapi import FastAPI, UploadFile, File, HTTPException
2
+ from docling.document_converter import DocumentConverter, PdfFormatOption
3
  from docling.datamodel.base_models import InputFormat
4
+ from docling.datamodel.pipeline_options import PdfPipelineOptions, EasyOcrOptions
5
  import shutil
6
  import os
7
  import tempfile
 
28
  pipeline_options.ocr_options = EasyOcrOptions() # Use EasyOCR for robust recognition on images
29
 
30
  # Initialize converter with explicit formats and options
31
+ # Note: PdfPipelineOptions is passed via PdfFormatOption for both PDF and Image pipeline config in newer Docling versions
32
  _converter_instance = DocumentConverter(
33
  allowed_formats=[InputFormat.IMAGE, InputFormat.PDF],
34
+ format_options={
35
+ InputFormat.PDF: PdfFormatOption(pipeline_options=pipeline_options),
36
+ InputFormat.IMAGE: PdfFormatOption(pipeline_options=pipeline_options)
37
+ }
38
  )
39
  return _converter_instance
40