Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -9,6 +9,25 @@ from pathlib import Path
|
|
| 9 |
from fastapi import FastAPI, UploadFile, File, HTTPException
|
| 10 |
from docling.datamodel.base_models import InputFormat
|
| 11 |
from docling.document_converter import DocumentConverter
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
|
| 13 |
app = FastAPI()
|
| 14 |
|
|
@@ -103,11 +122,7 @@ async def extract(file: UploadFile = File(...)):
|
|
| 103 |
"stream": stream
|
| 104 |
}
|
| 105 |
|
| 106 |
-
md = MarkItDown()
|
| 107 |
|
| 108 |
-
converter = DocumentConverter()
|
| 109 |
-
UPLOAD_DIR = Path("temp_uploads")
|
| 110 |
-
UPLOAD_DIR.mkdir(exist_ok=True)
|
| 111 |
|
| 112 |
@app.post("/docling")
|
| 113 |
async def convert_document(file: UploadFile = File(...)):
|
|
|
|
| 9 |
from fastapi import FastAPI, UploadFile, File, HTTPException
|
| 10 |
from docling.datamodel.base_models import InputFormat
|
| 11 |
from docling.document_converter import DocumentConverter
|
| 12 |
+
from docling.datamodel.pipeline_options import PdfPipelineOptions, EasyOcrOptions
|
| 13 |
+
from docling.document_converter import PdfFormatOption
|
| 14 |
+
from docling.datamodel.base_models import InputFormat
|
| 15 |
+
|
| 16 |
+
md = MarkItDown()
|
| 17 |
+
|
| 18 |
+
UPLOAD_DIR = Path("temp_uploads")
|
| 19 |
+
UPLOAD_DIR.mkdir(exist_ok=True)
|
| 20 |
+
|
| 21 |
+
pipeline_options = PdfPipelineOptions()
|
| 22 |
+
pipeline_options.ocr_options = EasyOcrOptions()
|
| 23 |
+
|
| 24 |
+
converter = DocumentConverter(
|
| 25 |
+
format_options={
|
| 26 |
+
InputFormat.PDF: PdfFormatOption(pipeline_options=pipeline_options)
|
| 27 |
+
}
|
| 28 |
+
)
|
| 29 |
+
|
| 30 |
+
print("Converter initialized successfully with EasyOCR!")
|
| 31 |
|
| 32 |
app = FastAPI()
|
| 33 |
|
|
|
|
| 122 |
"stream": stream
|
| 123 |
}
|
| 124 |
|
|
|
|
| 125 |
|
|
|
|
|
|
|
|
|
|
| 126 |
|
| 127 |
@app.post("/docling")
|
| 128 |
async def convert_document(file: UploadFile = File(...)):
|